List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:com.panguso.lc.analysis.format.mapreduce.TextOutputFormat.java
License:Open Source License
/** * @param job job/*w w w.j a v a 2 s. c o m*/ * @throws IOException IOException * @throws InterruptedException InterruptedException */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); String keyValueSeparator = conf.get("mapred.textoutputformat.separator", separate); CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(fileOut, keyValueSeparator); } else { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator); } }
From source file:com.phantom.hadoop.examples.terasort.TeraOutputFormat.java
License:Apache License
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException { Path file = getDefaultWorkFile(job, ""); FileSystem fs = file.getFileSystem(job.getConfiguration()); FSDataOutputStream fileOut = fs.create(file); return new TeraRecordWriter(fileOut, job); }
From source file:com.pinterest.terrapin.hadoop.HFileOutputFormat.java
License:Apache License
public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(TaskAttemptContext context) throws IOException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputDir.getFileSystem(conf); int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384); // Default to snappy. Compression.Algorithm compressionAlgorithm = getAlgorithm(conf.get(Constants.HFILE_COMPRESSION)); final StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize) .withFilePath(hfilePath(outputPath, context.getTaskAttemptID().getTaskID().getId())) .withCompression(compressionAlgorithm).build(); return new HFileRecordWriter(writer); }
From source file:com.pivotal.hawq.mapreduce.ao.HAWQAORecordReader.java
License:Apache License
/** * Called once at initialization./*from www . j a va2 s . c om*/ * * @param split * the split that defines the range of records to read * @param context * the information about the task * @throws IOException * @throws InterruptedException */ @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { // initialize the value Configuration conf = context.getConfiguration(); // Extract the parameters needed by HAWQAOFileReader and HAWQAORecord String encoding = HAWQConfiguration.getInputTableEncoding(conf); HAWQSchema schema = HAWQConfiguration.getInputTableSchema(conf); /* * GPSQL-1047 * * Get version from configuration and init HAWQAORecord with it */ String version = HAWQConfiguration.getDatabaseVersion(conf); filereader = new HAWQAOFileReader(conf, split); try { value = new HAWQAORecord(schema, encoding, version); } catch (HAWQException hawqE) { throw new IOException(hawqE.getMessage()); } }
From source file:com.pivotal.hawq.mapreduce.HAWQInputFormat.java
License:Apache License
@Override public RecordReader<Void, HAWQRecord> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { HAWQTableFormat tableFormat = getTableFormat(context.getConfiguration()); switch (tableFormat) { case AO:/*from ww w .jav a2s .c om*/ return aoInputFormat.createRecordReader(split, context); case Parquet: return parquetInputFormat.createRecordReader(split, context); default: throw new AssertionError("invalid table format: " + tableFormat); } }
From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java
License:Apache License
/** * Create a file output committer/*from w w w.j a v a 2s . c om*/ * @param outputPath the job's output path * @param context the task's context * @throws IOException */ public FileOutputCommitter(Path outputPath, TaskAttemptContext context) throws IOException { super(outputPath, context); Job job = new Job(context.getConfiguration()); String outputDirectories = job.getConfiguration().get(MULTIPLE_OUTPUTS, ""); if (outputDirectories != null) { StringTokenizer st = new StringTokenizer(outputDirectories, " "); while (st.hasMoreTokens()) { pathNames.add(st.nextToken()); } } if (outputPath != null) { this.outputPath = outputPath; outputFileSystem = outputPath.getFileSystem(context.getConfiguration()); workPath = new Path(outputPath, (FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + "_" + context.getTaskAttemptID().toString())).makeQualified(outputFileSystem); for (String p : pathNames) { if (outputPath.toString().endsWith(p)) { committers.put(p, this); fake = false; break; } } } }
From source file:com.rim.logdriver.mapreduce.avro.AvroBlockInputFormat.java
License:Apache License
/** * Creates a new AvroBlockRecordReader.//from w w w . j av a2s .c o m * * Increases there default value mapreduce.job.max.split.locations to 100000, * if it's not already set. * * Also sets mapred.max.split.size to the default block size for the root * directory ("/"), if it's not already set. * * @param split * The InputSplit. * @param context * The TaskAttemptContext. * @return A new AvroBlockRecordReader. * @throws IOException * If there is an I/O error. */ @Override public RecordReader<AvroFileHeader, BytesWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); // Ensure we have sensible defaults for how we build blocks. if (conf.get("mapreduce.job.max.split.locations") == null) { conf.setLong("mapreduce.job.max.split.locations", MAX_SPLIT_LOCATIONS); } if (conf.get("mapred.max.split.size") == null) { // Try to set the split size to the default block size. In case of // failure, we'll use this 128MB default. long blockSize = 128 * 1024 * 1024; // 128MB try { blockSize = FileSystem.get(conf).getDefaultBlockSize(); } catch (IOException e) { LOG.error("Error getting filesystem to get get default block size (this does not bode well)."); } conf.setLong("mapred.max.split.size", blockSize); } return new AvroBlockRecordReader(); }
From source file:com.rw.legion.input.CsvInputFormat.java
License:Apache License
@Override public RecordReader<NullWritable, LegionRecord> createRecordReader(InputSplit split, TaskAttemptContext context) { String delimiter = context.getConfiguration().get("textinputformat.record.delimiter"); byte[] recordDelimiterBytes = null; if (null != delimiter) recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8); return new CsvRecordReader(recordDelimiterBytes); }
From source file:com.rw.legion.input.JsonInputFormat.java
License:Apache License
@Override public RecordReader<NullWritable, LegionRecord> createRecordReader(InputSplit split, TaskAttemptContext context) { String delimiter = context.getConfiguration().get("textinputformat.record.delimiter"); byte[] recordDelimiterBytes = null; if (null != delimiter) recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8); return new JsonRecordReader(recordDelimiterBytes); }
From source file:com.rw.legion.input.LegionRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { /*/*from w w w . ja v a2s.co m*/ * fileBroken tracks whether there's been an IOException while reading * this file. If there has, the record reader will simply stop reading * records for this particular file, rather than blowing up the whole * job. */ fileBroken = false; currentLine = new Text(); currentLineNumber = 0; FileSplit split = (FileSplit) genericSplit; if (split.getLength() == 0) { fileBroken = true; } // Load the Legion Objective. Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); legionObjective = ObjectiveDeserializer.deserialize(job.get("legion_objective")); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); // Open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); // Grab the file name to include with the data. fileName = file.toString(); // Does the Legion Objective specify an input codec to use? if (legionObjective.getCodecOverride() != null) { isCompressedInput = true; CompressionCodec codec = new CompressionCodecFactory(job) .getCodecByClassName(legionObjective.getCodecOverride()); decompressor = CodecPool.getDecompressor(codec); in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); filePosition = fileIn; } else { CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); if (null != codec) { isCompressedInput = true; decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); filePosition = fileIn; } } else { fileIn.seek(start); in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes); filePosition = fileIn; } } /* * If this is not the first split, we always throw away first record * because we always (except the last split) read one extra line in * next() method. */ if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }