List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:de.gesundkrank.wikipedia.hadoop.util.MapFileOutputFormat.java
License:Open Source License
@Override public RecordWriter<WritableComparable, Writable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context); SequenceFile.Writer.Option keyClass = SequenceFile.Writer.keyClass(WritableComparable.class); SequenceFile.Writer.Option valueClass = SequenceFile.Writer.valueClass(Writable.class); SequenceFile.Writer.Option compressionType = SequenceFile.Writer .compression(SequenceFile.CompressionType.BLOCK); final MapFile.Writer out = new MapFile.Writer(conf, committer.getWorkPath(), keyClass, valueClass, compressionType);/*from w w w .jav a 2 s . c o m*/ return new RecordWriter<WritableComparable, Writable>() { @Override public void close(TaskAttemptContext arg0) throws IOException, InterruptedException { out.close(); } @Override public void write(WritableComparable key, Writable value) throws IOException, InterruptedException { out.append(key, value); } }; }
From source file:de.l3s.common.hadoop.WholeFileRecordReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { this.fileSplit = (FileSplit) inputSplit; this.conf = taskAttemptContext.getConfiguration(); }
From source file:de.l3s.streamcorpus.terrier.ThriftFileCollectionRecordReader.java
License:Apache License
/** * Read the urls / paths of files from the input file * *///from www . j a va 2 s .c o m @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { collectionIndex = -1; conf = context.getConfiguration(); fs = FileSystem.get(conf); loadPathsFromInputSplit(split, conf); }
From source file:de.rwhq.hdfs.index.LineRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();/*from w w w . jav a2 s . c o m*/ end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new LineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:de.sec.dns.playground.ARFFOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); String extension = ".arff"; Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); FSDataOutputStream fileOut = fs.create(file, false); return new ARFFLineRecordWriter<K, V>(fileOut, job); }
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.deduplication.DocumentInfoOutputFormat.java
License:Apache License
@Override public RecordWriter<NullWritable, List<DocumentInfo>> getRecordWriter(TaskAttemptContext job) throws IOException { //get the current path Configuration conf = job.getConfiguration(); String extension = ".txt"; //create the full path with the output directory plus our filename Path file = getDefaultWorkFile(job, extension); //create the file in the file system FileSystem fs = file.getFileSystem(conf); FSDataOutputStream fileOut = fs.create(file, false); //create our record writer with the new file return new DocumentInfoRecordWriter(fileOut); }
From source file:dev.geminileft.outputformat.MyTextOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); String keyValueSeparator = conf.get(SEPERATOR, "\t"); String keyValueDelimiter = conf.get(DELIMITER, "\n"); CompressionCodec codec = null;// w w w.j ava2s . c om String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(fileOut, keyValueSeparator, keyValueDelimiter); } else { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator, keyValueDelimiter); } }
From source file:diamondmapreduce.NLineRecordReader.java
License:Apache License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; final Path file = split.getPath(); Configuration conf = context.getConfiguration(); this.maxLineLength = conf.getInt("mapreduce.input.linerecordreader.line.maxlength", Integer.MAX_VALUE); FileSystem fs = file.getFileSystem(conf); start = split.getStart();//ww w .j a v a 2 s .c o m end = start + split.getLength(); boolean skipFirstLine = false; FSDataInputStream filein = fs.open(split.getPath()); if (start != 0) { skipFirstLine = true; --start; filein.seek(start); } in = new LineReader(filein, conf); if (skipFirstLine) { start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:dk.aau.cs.cloudetl.io.SequenceIndexFileReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) split; conf = context.getConfiguration(); Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(conf); this.in = new SequenceFile.Reader(fs, path, conf); this.end = fileSplit.getStart() + fileSplit.getLength(); if (fileSplit.getStart() > in.getPosition()) { in.sync(fileSplit.getStart()); // sync to start }/*from www . j av a2 s.c o m*/ this.start = in.getPosition(); more = start < end; }
From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsFileOutputCommitter.java
License:Apache License
/** * Create a file output committer/* www .j av a2 s .co m*/ * * @param outputPath the job's output path * @param context the task's context * @throws IOException */ public HirodsFileOutputCommitter(Path outputPath, Path tempPath, TaskAttemptContext context) throws IOException { if (outputPath != null && tempPath != null) { this.outputPath = outputPath; this.outputFileSystem = outputPath.getFileSystem(context.getConfiguration()); this.tempPath = tempPath; this.workFileSystem = tempPath.getFileSystem(context.getConfiguration()); this.workPath = new Path(tempPath, (HirodsFileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + "_" + context.getTaskAttemptID().toString())).makeQualified(this.workFileSystem); } }