List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:jp.ac.u.tokyo.m.pig.udf.store.FreeEncodingPigTextOutputFormat.java
License:Apache License
@SuppressWarnings("rawtypes") @Override//from ww w .j a va2 s .c o m public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new FreeEncodingPigLineRecordWriter(fileOut, mFieldDelimiter, mEncoding); } else { FSDataOutputStream fileOut = fs.create(file, false); return new FreeEncodingPigLineRecordWriter(new DataOutputStream(codec.createOutputStream(fileOut)), mFieldDelimiter, mEncoding); } }
From source file:kafka.bridge.hadoop.KafkaOutputFormat.java
License:Apache License
@Override public RecordWriter<NullWritable, W> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Path outputPath = getOutputPath(context); if (outputPath == null) throw new IllegalArgumentException("no kafka output url specified"); URI uri = outputPath.toUri(); Configuration job = context.getConfiguration(); final String topic = uri.getPath().substring(1); // ignore the initial '/' in the path final int queueSize = job.getInt("kafka.output.queue_size", KAFKA_QUEUE_SIZE); final int timeout = job.getInt("kafka.output.connect_timeout", KAFKA_PRODUCER_CONNECT_TIMEOUT); final int interval = job.getInt("kafka.output.reconnect_interval", KAFKA_PRODUCER_RECONNECT_INTERVAL); final int bufSize = job.getInt("kafka.output.bufsize", KAFKA_PRODUCER_BUFFER_SIZE); final int maxSize = job.getInt("kafka.output.max_msgsize", KAFKA_PRODUCER_MAX_MESSAGE_SIZE); job.set("kafka.output.server", String.format("%s:%d", uri.getHost(), uri.getPort())); job.set("kafka.output.topic", topic); job.setInt("kafka.output.queue_size", queueSize); job.setInt("kafka.output.connect_timeout", timeout); job.setInt("kafka.output.reconnect_interval", interval); job.setInt("kafka.output.bufsize", bufSize); job.setInt("kafka.output.max_msgsize", maxSize); if (uri.getHost().isEmpty()) throw new IllegalArgumentException("missing kafka server"); if (uri.getPath().isEmpty()) throw new IllegalArgumentException("missing kafka topic"); Properties props = new Properties(); props.setProperty("host", uri.getHost()); props.setProperty("port", Integer.toString(uri.getPort())); props.setProperty("buffer.size", Integer.toString(bufSize)); props.setProperty("connect.timeout.ms", Integer.toString(timeout)); props.setProperty("reconnect.interval", Integer.toString(interval)); props.setProperty("max.message.size", Integer.toString(maxSize)); SyncProducer producer = new SyncProducer(new SyncProducerConfig(props)); return new KafkaRecordWriter<W>(producer, topic, queueSize); }
From source file:kafka.bridge.hadoop2.KafkaOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Path outputPath = getOutputPath(context); if (outputPath == null) throw new KafkaException("no kafka output url specified"); URI uri = URI.create(outputPath.toString()); Configuration job = context.getConfiguration(); Properties props = new Properties(); String topic;//from www .jav a2s .co m props.putAll(kafkaConfigMap); // inject default configuration for (Map.Entry<String, String> m : job) { // handle any overrides if (!m.getKey().startsWith(KAFKA_CONFIG_PREFIX)) continue; if (m.getKey().equals(KAFKA_URL)) continue; String kafkaKeyName = m.getKey().substring(KAFKA_CONFIG_PREFIX.length() + 1); props.setProperty(kafkaKeyName, m.getValue()); // set Kafka producer property } // inject Kafka producer props back into jobconf for easier debugging for (Map.Entry<Object, Object> m : props.entrySet()) { job.set(KAFKA_CONFIG_PREFIX + "." + m.getKey().toString(), m.getValue().toString()); } // KafkaOutputFormat specific parameters final int queueBytes = job.getInt(KAFKA_CONFIG_PREFIX + ".queue.bytes", KAFKA_QUEUE_BYTES); if (uri.getScheme().equals("kafka")) { // using the direct broker list // URL: kafka://<kafka host>/<topic> // e.g. kafka://kafka-server:9000,kafka-server2:9000/foobar String brokerList = uri.getAuthority(); props.setProperty("metadata.broker.list", brokerList); job.set(KAFKA_CONFIG_PREFIX + ".metadata.broker.list", brokerList); if (uri.getPath() == null || uri.getPath().length() <= 1) throw new KafkaException("no topic specified in kafka uri"); topic = uri.getPath().substring(1); // ignore the initial '/' in the path job.set(KAFKA_CONFIG_PREFIX + ".topic", topic); log.info(String.format("using kafka broker %s (topic %s)", brokerList, topic)); } else throw new KafkaException("missing scheme from kafka uri (must be kafka://)"); Producer<Object, byte[]> producer = new Producer<Object, byte[]>(new ProducerConfig(props)); return new KafkaRecordWriter<K, V>(producer, topic, queueBytes); }
From source file:kogiri.common.hadoop.io.format.map.BloomMapFileOutputFormat.java
License:Apache License
@Override public RecordWriter<WritableComparable<?>, Writable> getRecordWriter(TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); CompressionCodec codec = null;// w w w . j a v a 2s .c o m CompressionType compressionType = CompressionType.NONE; if (getCompressOutput(context)) { // find the kind of compression to do compressionType = SequenceFileOutputFormat.getOutputCompressionType(context); // find the right codec Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); } Path file = getDefaultWorkFile(context, ""); FileSystem fs = file.getFileSystem(conf); // ignore the progress parameter, since MapFile is local final BloomMapFile.Writer out = new BloomMapFile.Writer(conf, fs, file.toString(), context.getOutputKeyClass().asSubclass(WritableComparable.class), context.getOutputValueClass().asSubclass(Writable.class), compressionType, codec, context); return new RecordWriter<WritableComparable<?>, Writable>() { @Override public void write(WritableComparable<?> key, Writable value) throws IOException { out.append(key, value); } @Override public void close(TaskAttemptContext context) throws IOException { out.close(); } }; }
From source file:kogiri.common.hadoop.io.reader.fasta.FastaRawReadReader.java
License:Open Source License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); this.start = split.getStart(); this.end = this.start + split.getLength(); final Path file = split.getPath(); this.compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = this.compressionCodecs.getCodec(file); this.filename = file.getName(); this.firstRead = true; // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); // get uncompressed length if (codec instanceof GzipCodec) { this.isCompressed = true; FSDataInputStream fileInCheckSize = fs.open(file); byte[] len = new byte[4]; try {/*from ww w . j a v a 2s .c om*/ LOG.info("compressed input : " + file.getName()); LOG.info("compressed file size : " + this.end); fileInCheckSize.skip(this.end - 4); IOUtils.readFully(fileInCheckSize, len, 0, len.length); this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0]; if (this.uncompressedSize < 0) { this.uncompressedSize = this.end; } LOG.info("uncompressed file size : " + this.uncompressedSize); } finally { fileInCheckSize.close(); } this.end = Long.MAX_VALUE; } else if (codec != null) { this.isCompressed = true; this.end = Long.MAX_VALUE; this.uncompressedSize = Long.MAX_VALUE; } else { this.isCompressed = false; } // get inputstream FSDataInputStream fileIn = fs.open(file); if (codec != null) { this.in = new LineReader(codec.createInputStream(fileIn), job); } else { if (this.start != 0) { fileIn.seek(this.start); } this.in = new LineReader(fileIn, job); } // skip lines until we meet new read start while (this.start < this.end) { Text skipText = new Text(); long newSize = this.in.readLine(skipText, this.maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength)); if (newSize == 0) { // EOF this.hasNextRead = false; this.pos = this.end; break; } if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) { this.prevLine = skipText; this.prevSize = newSize; this.hasNextRead = true; this.pos = this.start; break; } this.start += newSize; if (this.start >= this.end) { // EOF this.hasNextRead = false; this.pos = this.end; break; } } this.key = null; this.value = null; }
From source file:kogiri.common.hadoop.io.reader.fasta.FastaReadDescriptionReader.java
License:Open Source License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); this.start = split.getStart(); this.end = this.start + split.getLength(); final Path file = split.getPath(); this.compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = this.compressionCodecs.getCodec(file); this.filename = file.getName(); this.firstRead = true; // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); // get uncompressed length if (codec instanceof GzipCodec) { this.isCompressed = true; FSDataInputStream fileInCheckSize = fs.open(file); byte[] len = new byte[4]; try {/* www . ja v a2s. c o m*/ LOG.info("compressed input : " + file.getName()); LOG.info("compressed file size : " + this.end); fileInCheckSize.skip(this.end - 4); IOUtils.readFully(fileInCheckSize, len, 0, len.length); this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0]; if (this.uncompressedSize < 0) { this.uncompressedSize = this.end; } LOG.info("uncompressed file size : " + this.uncompressedSize); } finally { fileInCheckSize.close(); } this.end = Long.MAX_VALUE; } else if (codec != null) { this.isCompressed = true; this.end = Long.MAX_VALUE; this.uncompressedSize = Long.MAX_VALUE; } else { this.isCompressed = false; } // get inputstream FSDataInputStream fileIn = fs.open(file); if (codec != null) { this.in = new LineReader(codec.createInputStream(fileIn), job); } else { if (this.start != 0) { fileIn.seek(this.start); } this.in = new LineReader(fileIn, job); } // skip lines until we meet new record start while (this.start < this.end) { Text skipText = new Text(); long newSize = this.in.readLine(skipText, this.maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength)); if (newSize == 0) { // EOF this.hasNextRecord = false; this.pos = this.end; break; } if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) { this.prevLine = skipText; this.prevSize = newSize; this.hasNextRecord = true; this.pos = this.start; break; } this.start += newSize; if (this.start >= this.end) { // EOF this.hasNextRecord = false; this.pos = this.end; break; } } this.key = null; this.value = null; }
From source file:kogiri.mapreduce.common.kmermatch.KmerJoiner.java
License:Open Source License
public KmerJoiner(Path[] kmerIndexPath, KmerRangePartition partition, AKmerIndexRecordFilter[] filter, TaskAttemptContext context) throws IOException { initialize(kmerIndexPath, partition, filter, context.getConfiguration()); }
From source file:kogiri.mapreduce.common.kmermatch.KmerMatchRecordReader.java
License:Open Source License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { if (!(split instanceof KmerMatchInputSplit)) { throw new IOException("split is not an instance of KmerMatchIndexSplit"); }/* ww w.j a va2 s . c om*/ KmerMatchInputSplit kmerIndexSplit = (KmerMatchInputSplit) split; this.conf = context.getConfiguration(); this.inputIndexPath = kmerIndexSplit.getIndexFilePath(); KmerRangePartition partition = kmerIndexSplit.getPartition(); KmerMatchInputFormatConfig inputFormatConfig = KmerMatchInputFormatConfig.createInstance(this.conf); AKmerIndexRecordFilter[] kmerIndexRecordFilter = new AKmerIndexRecordFilter[this.inputIndexPath.length]; for (int i = 0; i < this.inputIndexPath.length; i++) { String fastaFilename = KmerIndexHelper.getFastaFileName(this.inputIndexPath[i].getName()); Path statisticsFile = new Path(inputFormatConfig.getKmerStatisticsPath(), KmerStatisticsHelper.makeKmerStatisticsFileName(fastaFilename)); FileSystem fs = statisticsFile.getFileSystem(this.conf); KmerStatistics statistics = KmerStatistics.createInstance(fs, statisticsFile); KmerStandardDeviation stddev = new KmerStandardDeviation(); stddev.setAverage(statistics.getAverageFrequency()); stddev.setStdDeviation(statistics.getStdDeviation()); stddev.setFactor(inputFormatConfig.getStandardDeviationFactor()); kmerIndexRecordFilter[i] = new STDKmerIndexRecordFilter(stddev); } this.joiner = new KmerJoiner(this.inputIndexPath, partition, kmerIndexRecordFilter, context); }
From source file:kogiri.mapreduce.preprocess.common.kmerindex.KmerIndexRecordReader.java
License:Open Source License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { if (!(split instanceof KmerIndexSplit)) { throw new IOException("split is not an instance of KmerIndexSplit"); }//from w w w.j a v a 2 s. c om KmerIndexSplit kmerIndexSplit = (KmerIndexSplit) split; this.conf = context.getConfiguration(); this.inputIndexPaths = kmerIndexSplit.getIndexFilePaths(); this.inputFormatConfig = KmerIndexInputFormatConfig.createInstance(this.conf); FileSystem fs = this.inputIndexPaths[0].getFileSystem(this.conf); this.indexReader = new KmerIndexReader(fs, new Path(this.inputFormatConfig.getKmerIndexIndexPath()), this.conf); this.currentProgress = BigInteger.ZERO; StringBuilder endKmer = new StringBuilder(); for (int i = 0; i < this.inputFormatConfig.getKmerSize(); i++) { endKmer.append("T"); } this.progressEnd = SequenceHelper.convertToBigInteger(endKmer.toString()); this.curKey = null; this.curVal = null; }
From source file:libra.common.hadoop.io.reader.fasta.FastaKmerReader.java
License:Apache License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; Configuration conf = context.getConfiguration(); this.kmersize = FastaKmerInputFormat.getKmerSize(conf); this.maxLineLength = conf.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); this.start = split.getStart(); this.end = this.start + split.getLength(); final Path file = split.getPath(); this.compressionCodecs = new CompressionCodecFactory(conf); final CompressionCodec codec = this.compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(conf); // get uncompressed length if (codec instanceof GzipCodec) { this.isCompressed = true; FSDataInputStream fileInCheckSize = fs.open(file); byte[] len = new byte[4]; try {//from w w w.jav a2 s . co m LOG.info("compressed input : " + file.getName()); LOG.info("compressed file size : " + this.end); fileInCheckSize.skip(this.end - 4); IOUtils.readFully(fileInCheckSize, len, 0, len.length); this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0]; if (this.uncompressedSize < 0) { this.uncompressedSize = this.end; } LOG.info("uncompressed file size : " + this.uncompressedSize); } finally { fileInCheckSize.close(); } this.end = Long.MAX_VALUE; } else if (codec != null) { this.isCompressed = true; this.end = Long.MAX_VALUE; this.uncompressedSize = Long.MAX_VALUE; } else { this.isCompressed = false; } // get inputstream FSDataInputStream fileIn = fs.open(file); boolean inTheMiddle = false; if (codec != null) { this.in = new LineReader(codec.createInputStream(fileIn), conf); } else { if (this.start != 0) { this.start--; fileIn.seek(this.start); inTheMiddle = true; } this.in = new LineReader(fileIn, conf); } this.buffer = new Text(); if (inTheMiddle) { // find new start line this.start += this.in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, this.end - this.start)); // back off FSDataInputStream fileIn2 = fs.open(file); fileIn2.seek(this.start - 1000); LineReader in2 = new LineReader(fileIn2, conf); Text tempLine = new Text(); long curpos = this.start - 1000; while (curpos < this.start) { curpos += in2.readLine(tempLine, 0, (int) (this.start - curpos)); } if (tempLine.charAt(0) == READ_DELIMITER) { // clean start this.buffer.clear(); } else { // leave k-1 seq in the buffer String seq = tempLine.toString().trim(); String left = seq.substring(seq.length() - this.kmersize + 1); this.buffer.set(left); } in2.close(); } this.pos = this.start; this.key = null; this.value = null; }