Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:jp.ac.u.tokyo.m.pig.udf.store.FreeEncodingPigTextOutputFormat.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override//from  ww  w  .j  a va2 s .c o  m
public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new FreeEncodingPigLineRecordWriter(fileOut, mFieldDelimiter, mEncoding);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new FreeEncodingPigLineRecordWriter(new DataOutputStream(codec.createOutputStream(fileOut)),
                mFieldDelimiter, mEncoding);
    }
}

From source file:kafka.bridge.hadoop.KafkaOutputFormat.java

License:Apache License

@Override
public RecordWriter<NullWritable, W> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path outputPath = getOutputPath(context);
    if (outputPath == null)
        throw new IllegalArgumentException("no kafka output url specified");
    URI uri = outputPath.toUri();
    Configuration job = context.getConfiguration();

    final String topic = uri.getPath().substring(1); // ignore the initial '/' in the path

    final int queueSize = job.getInt("kafka.output.queue_size", KAFKA_QUEUE_SIZE);
    final int timeout = job.getInt("kafka.output.connect_timeout", KAFKA_PRODUCER_CONNECT_TIMEOUT);
    final int interval = job.getInt("kafka.output.reconnect_interval", KAFKA_PRODUCER_RECONNECT_INTERVAL);
    final int bufSize = job.getInt("kafka.output.bufsize", KAFKA_PRODUCER_BUFFER_SIZE);
    final int maxSize = job.getInt("kafka.output.max_msgsize", KAFKA_PRODUCER_MAX_MESSAGE_SIZE);

    job.set("kafka.output.server", String.format("%s:%d", uri.getHost(), uri.getPort()));
    job.set("kafka.output.topic", topic);
    job.setInt("kafka.output.queue_size", queueSize);
    job.setInt("kafka.output.connect_timeout", timeout);
    job.setInt("kafka.output.reconnect_interval", interval);
    job.setInt("kafka.output.bufsize", bufSize);
    job.setInt("kafka.output.max_msgsize", maxSize);

    if (uri.getHost().isEmpty())
        throw new IllegalArgumentException("missing kafka server");
    if (uri.getPath().isEmpty())
        throw new IllegalArgumentException("missing kafka topic");

    Properties props = new Properties();
    props.setProperty("host", uri.getHost());
    props.setProperty("port", Integer.toString(uri.getPort()));
    props.setProperty("buffer.size", Integer.toString(bufSize));
    props.setProperty("connect.timeout.ms", Integer.toString(timeout));
    props.setProperty("reconnect.interval", Integer.toString(interval));
    props.setProperty("max.message.size", Integer.toString(maxSize));

    SyncProducer producer = new SyncProducer(new SyncProducerConfig(props));
    return new KafkaRecordWriter<W>(producer, topic, queueSize);
}

From source file:kafka.bridge.hadoop2.KafkaOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    Path outputPath = getOutputPath(context);
    if (outputPath == null)
        throw new KafkaException("no kafka output url specified");
    URI uri = URI.create(outputPath.toString());
    Configuration job = context.getConfiguration();

    Properties props = new Properties();
    String topic;//from  www .jav  a2s .co  m

    props.putAll(kafkaConfigMap); // inject default configuration
    for (Map.Entry<String, String> m : job) { // handle any overrides
        if (!m.getKey().startsWith(KAFKA_CONFIG_PREFIX))
            continue;
        if (m.getKey().equals(KAFKA_URL))
            continue;

        String kafkaKeyName = m.getKey().substring(KAFKA_CONFIG_PREFIX.length() + 1);
        props.setProperty(kafkaKeyName, m.getValue()); // set Kafka producer property
    }

    // inject Kafka producer props back into jobconf for easier debugging
    for (Map.Entry<Object, Object> m : props.entrySet()) {
        job.set(KAFKA_CONFIG_PREFIX + "." + m.getKey().toString(), m.getValue().toString());
    }

    // KafkaOutputFormat specific parameters
    final int queueBytes = job.getInt(KAFKA_CONFIG_PREFIX + ".queue.bytes", KAFKA_QUEUE_BYTES);

    if (uri.getScheme().equals("kafka")) {
        // using the direct broker list
        // URL: kafka://<kafka host>/<topic>
        // e.g. kafka://kafka-server:9000,kafka-server2:9000/foobar
        String brokerList = uri.getAuthority();
        props.setProperty("metadata.broker.list", brokerList);
        job.set(KAFKA_CONFIG_PREFIX + ".metadata.broker.list", brokerList);

        if (uri.getPath() == null || uri.getPath().length() <= 1)
            throw new KafkaException("no topic specified in kafka uri");

        topic = uri.getPath().substring(1); // ignore the initial '/' in the path
        job.set(KAFKA_CONFIG_PREFIX + ".topic", topic);
        log.info(String.format("using kafka broker %s (topic %s)", brokerList, topic));
    } else
        throw new KafkaException("missing scheme from kafka uri (must be kafka://)");

    Producer<Object, byte[]> producer = new Producer<Object, byte[]>(new ProducerConfig(props));
    return new KafkaRecordWriter<K, V>(producer, topic, queueBytes);
}

From source file:kogiri.common.hadoop.io.format.map.BloomMapFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<WritableComparable<?>, Writable> getRecordWriter(TaskAttemptContext context)
        throws IOException {
    Configuration conf = context.getConfiguration();
    CompressionCodec codec = null;//  w w  w  . j  a  v a 2s .c o m
    CompressionType compressionType = CompressionType.NONE;
    if (getCompressOutput(context)) {
        // find the kind of compression to do
        compressionType = SequenceFileOutputFormat.getOutputCompressionType(context);

        // find the right codec
        Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
    }

    Path file = getDefaultWorkFile(context, "");
    FileSystem fs = file.getFileSystem(conf);
    // ignore the progress parameter, since MapFile is local
    final BloomMapFile.Writer out = new BloomMapFile.Writer(conf, fs, file.toString(),
            context.getOutputKeyClass().asSubclass(WritableComparable.class),
            context.getOutputValueClass().asSubclass(Writable.class), compressionType, codec, context);

    return new RecordWriter<WritableComparable<?>, Writable>() {
        @Override
        public void write(WritableComparable<?> key, Writable value) throws IOException {
            out.append(key, value);
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException {
            out.close();
        }
    };
}

From source file:kogiri.common.hadoop.io.reader.fasta.FastaRawReadReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    this.filename = file.getName();

    this.firstRead = true;

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {/*from   ww w . j a  v a 2s  .c om*/
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);

    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), job);
    } else {
        if (this.start != 0) {
            fileIn.seek(this.start);
        }
        this.in = new LineReader(fileIn, job);
    }

    // skip lines until we meet new read start
    while (this.start < this.end) {
        Text skipText = new Text();
        long newSize = this.in.readLine(skipText, this.maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength));
        if (newSize == 0) {
            // EOF
            this.hasNextRead = false;
            this.pos = this.end;
            break;
        }

        if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) {
            this.prevLine = skipText;
            this.prevSize = newSize;
            this.hasNextRead = true;
            this.pos = this.start;
            break;
        }

        this.start += newSize;

        if (this.start >= this.end) {
            // EOF
            this.hasNextRead = false;
            this.pos = this.end;
            break;
        }
    }

    this.key = null;
    this.value = null;
}

From source file:kogiri.common.hadoop.io.reader.fasta.FastaReadDescriptionReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    this.filename = file.getName();

    this.firstRead = true;

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {/* www . ja v a2s. c  o  m*/
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);

    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), job);
    } else {
        if (this.start != 0) {
            fileIn.seek(this.start);
        }
        this.in = new LineReader(fileIn, job);
    }

    // skip lines until we meet new record start
    while (this.start < this.end) {
        Text skipText = new Text();
        long newSize = this.in.readLine(skipText, this.maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength));
        if (newSize == 0) {
            // EOF
            this.hasNextRecord = false;
            this.pos = this.end;
            break;
        }

        if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) {
            this.prevLine = skipText;
            this.prevSize = newSize;
            this.hasNextRecord = true;
            this.pos = this.start;
            break;
        }

        this.start += newSize;

        if (this.start >= this.end) {
            // EOF
            this.hasNextRecord = false;
            this.pos = this.end;
            break;
        }
    }

    this.key = null;
    this.value = null;
}

From source file:kogiri.mapreduce.common.kmermatch.KmerJoiner.java

License:Open Source License

public KmerJoiner(Path[] kmerIndexPath, KmerRangePartition partition, AKmerIndexRecordFilter[] filter,
        TaskAttemptContext context) throws IOException {
    initialize(kmerIndexPath, partition, filter, context.getConfiguration());
}

From source file:kogiri.mapreduce.common.kmermatch.KmerMatchRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    if (!(split instanceof KmerMatchInputSplit)) {
        throw new IOException("split is not an instance of KmerMatchIndexSplit");
    }/* ww w.j a  va2 s . c  om*/

    KmerMatchInputSplit kmerIndexSplit = (KmerMatchInputSplit) split;
    this.conf = context.getConfiguration();
    this.inputIndexPath = kmerIndexSplit.getIndexFilePath();

    KmerRangePartition partition = kmerIndexSplit.getPartition();

    KmerMatchInputFormatConfig inputFormatConfig = KmerMatchInputFormatConfig.createInstance(this.conf);
    AKmerIndexRecordFilter[] kmerIndexRecordFilter = new AKmerIndexRecordFilter[this.inputIndexPath.length];

    for (int i = 0; i < this.inputIndexPath.length; i++) {
        String fastaFilename = KmerIndexHelper.getFastaFileName(this.inputIndexPath[i].getName());
        Path statisticsFile = new Path(inputFormatConfig.getKmerStatisticsPath(),
                KmerStatisticsHelper.makeKmerStatisticsFileName(fastaFilename));
        FileSystem fs = statisticsFile.getFileSystem(this.conf);
        KmerStatistics statistics = KmerStatistics.createInstance(fs, statisticsFile);

        KmerStandardDeviation stddev = new KmerStandardDeviation();
        stddev.setAverage(statistics.getAverageFrequency());
        stddev.setStdDeviation(statistics.getStdDeviation());
        stddev.setFactor(inputFormatConfig.getStandardDeviationFactor());

        kmerIndexRecordFilter[i] = new STDKmerIndexRecordFilter(stddev);
    }

    this.joiner = new KmerJoiner(this.inputIndexPath, partition, kmerIndexRecordFilter, context);
}

From source file:kogiri.mapreduce.preprocess.common.kmerindex.KmerIndexRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    if (!(split instanceof KmerIndexSplit)) {
        throw new IOException("split is not an instance of KmerIndexSplit");
    }//from w  w  w.j a  v a 2  s. c om

    KmerIndexSplit kmerIndexSplit = (KmerIndexSplit) split;
    this.conf = context.getConfiguration();
    this.inputIndexPaths = kmerIndexSplit.getIndexFilePaths();

    this.inputFormatConfig = KmerIndexInputFormatConfig.createInstance(this.conf);

    FileSystem fs = this.inputIndexPaths[0].getFileSystem(this.conf);
    this.indexReader = new KmerIndexReader(fs, new Path(this.inputFormatConfig.getKmerIndexIndexPath()),
            this.conf);

    this.currentProgress = BigInteger.ZERO;
    StringBuilder endKmer = new StringBuilder();
    for (int i = 0; i < this.inputFormatConfig.getKmerSize(); i++) {
        endKmer.append("T");
    }
    this.progressEnd = SequenceHelper.convertToBigInteger(endKmer.toString());

    this.curKey = null;
    this.curVal = null;
}

From source file:libra.common.hadoop.io.reader.fasta.FastaKmerReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration conf = context.getConfiguration();
    this.kmersize = FastaKmerInputFormat.getKmerSize(conf);
    this.maxLineLength = conf.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(conf);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(conf);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {//from  w w w.jav a2  s  . co m
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);
    boolean inTheMiddle = false;
    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), conf);
    } else {
        if (this.start != 0) {
            this.start--;
            fileIn.seek(this.start);

            inTheMiddle = true;
        }
        this.in = new LineReader(fileIn, conf);
    }

    this.buffer = new Text();

    if (inTheMiddle) {
        // find new start line
        this.start += this.in.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, this.end - this.start));

        // back off
        FSDataInputStream fileIn2 = fs.open(file);
        fileIn2.seek(this.start - 1000);

        LineReader in2 = new LineReader(fileIn2, conf);
        Text tempLine = new Text();
        long curpos = this.start - 1000;
        while (curpos < this.start) {
            curpos += in2.readLine(tempLine, 0, (int) (this.start - curpos));
        }

        if (tempLine.charAt(0) == READ_DELIMITER) {
            // clean start
            this.buffer.clear();
        } else {
            // leave k-1 seq in the buffer
            String seq = tempLine.toString().trim();
            String left = seq.substring(seq.length() - this.kmersize + 1);
            this.buffer.set(left);
        }

        in2.close();
    }

    this.pos = this.start;

    this.key = null;
    this.value = null;
}