List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:com.dinglicom.clouder.mapreduce.input.LineRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; System.out.println("-------------------length:" + split.getLength() + "\tposition:" + split.getStart()); Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); start = split.getStart();/*from w ww.j a v a2s . c o m*/ end = start + split.getLength(); final Path file = split.getPath(); key = new Text(FileToCDRType.getTypeByPath(file.getName())); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); if (null == this.recordDelimiterBytes) { in = new LineReader(cIn, job); } else { in = new LineReader(cIn, job, this.recordDelimiterBytes); } start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { if (null == this.recordDelimiterBytes) { in = new LineReader(codec.createInputStream(fileIn, decompressor), job); } else { in = new LineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); } filePosition = fileIn; } } else { fileIn.seek(start); if (null == this.recordDelimiterBytes) { in = new LineReader(fileIn, job); } else { in = new LineReader(fileIn, job, this.recordDelimiterBytes); } filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:com.dinglicom.clouder.mapreduce.input.TextInputFormat.java
License:Apache License
@Override public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) { String delimiter = context.getConfiguration().get("textinputformat.record.delimiter"); byte[] recordDelimiterBytes = null; if (null != delimiter) recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8); return new LineRecordReader(recordDelimiterBytes); }
From source file:com.edwardsit.spark4n6.EWFRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) split; Configuration conf = context.getConfiguration(); file = fileSplit.getPath();/*from w w w . jav a 2 s .co m*/ start = fileSplit.getStart(); end = start + fileSplit.getLength(); fs = file.getFileSystem(conf); stream = new EWFFileReader(fs, getFirstFile()); chunkSize = new EWFSegmentFileReader(fs).DEFAULT_CHUNK_SIZE; log.setLevel(Level.DEBUG); }
From source file:com.ery.hadoop.mrddx.hbase.HbaseOutputFormat.java
License:Apache License
@Override public RecordWriter<K, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { // public RecordWriter<K, NullWritable> getRecordWriter(FileSystem // ignored, JobConf job, String name, // Progressable progress) throws IOException { HbaseConfiguration dbConf = new HbaseConfiguration(context.getConfiguration(), HbaseConfiguration.FLAG_HBASE_OUTPUT); HTable table = new HTable(dbConf.getConf(), dbConf.getOutputHBaseTableName()); table.setAutoFlush(false, true);//from w w w. j a v a 2 s . c o m table.setWriteBufferSize(dbConf.getOutputHBaseWriteBuffersize()); return new HbaseRecordWriter<K, NullWritable>(dbConf, table); }
From source file:com.ery.hadoop.mrddx.hFile.LineRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();//from ww w .ja v a2 s .c o m end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); // tar.gzTarInputStream // new TarInputStream(codec.createInputStream(fileIn, decompressor) String filename = file.getName(); if (filename.endsWith(".tar.gz")) { in = new LineReader(new TarInputStream(cIn), job); } else { in = new LineReader(cIn, job); } start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { String filename = file.getName(); if (filename.endsWith(".tar.gz")) { in = new LineReader(new TarInputStream(codec.createInputStream(fileIn, decompressor)), job); } else { in = new LineReader(codec.createInputStream(fileIn, decompressor), job); } filePosition = fileIn; } } else { fileIn.seek(start); in = new LineReader(fileIn, job); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:com.explorys.apothecary.hbase.mr.inputformat.MergedStoreFileRecordReader.java
License:Apache License
/** * In initialize we check the number of files in the table directory. * This is an invariant that will cause us to fail the job if it changes, because it is * likely that when this changes, the number of regions has changed as a result of a split. * //www . j a v a 2s . c o m * We also initialize a scanner that handles merging KeyValues from storefiles * (or Memstore although this is not currently accessible via the API) * */ @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { scanner = new CachedPeekHFilesScanner(region, scan); tableDir = region.getTableDir(); fs = FileSystem.get(context.getConfiguration()); int numFiles = HRegionUtil.lsRecursive(fs, tableDir).size(); int numFilesConf = context.getConfiguration().getInt(HFileResultInputFormat.NUMBER_TABLE_FILES, 0); if (numFiles != numFilesConf) { throw new IllegalStateException( "Number of files has shifted underneath the job. Possible region split?"); } numberTableFiles = numFiles; scanner.initialize(); }
From source file:com.facebook.hiveio.common.HadoopUtils.java
License:Apache License
/** * Set worker output directory/*from w w w . j a va2 s . c om*/ * @param context Task context * @throws IOException I/O errors */ public static void setWorkOutputDir(TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); String outputPath = getOutputDir(conf); // we need to do this to get the task path and set it for mapred // implementation since it can't be done automatically because of // mapreduce->mapred abstraction if (outputPath != null) { FileOutputCommitter foc = new FileOutputCommitter(getOutputPath(conf), context); Path path = foc.getWorkPath(); FileSystem fs = path.getFileSystem(conf); fs.mkdirs(path); conf.set("mapred.work.output.dir", path.toString()); LOG.info("Setting mapred.work.output.dir to {}", path.toString()); } }
From source file:com.facebook.hiveio.input.HiveApiInputFormat.java
License:Apache License
@Override public RecordReaderImpl createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); JobConf jobConf = new JobConf(conf); HInputSplit split = (HInputSplit) inputSplit; split.setConf(jobConf);// w w w. j a va 2 s . c o m int[] columnIds = split.getColumnIds(); HiveUtils.setReadColumnIds(jobConf, columnIds); // CHECKSTYLE: stop LineLength org.apache.hadoop.mapred.RecordReader<WritableComparable, Writable> baseRecordReader = split .getBaseRecordReader(jobConf, context); // CHECKSTYLE: resume LineLength RecordParser<Writable> recordParser = getParser(baseRecordReader.createValue(), split, columnIds, conf); RecordReaderImpl reader = new RecordReaderImpl(baseRecordReader, recordParser); reader.setObserver(observer); return reader; }
From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java
License:Apache License
@Override public RecordWriterImpl getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { HadoopUtils.setWorkOutputDir(taskAttemptContext); Configuration conf = taskAttemptContext.getConfiguration(); OutputConf outputConf = new OutputConf(conf, myProfileId); OutputInfo oti = outputConf.readOutputTableInfo(); HiveUtils.setRCileNumColumns(conf, oti.getColumnInfo().size()); HadoopUtils.setOutputKeyWritableClass(conf, NullWritable.class); Serializer serializer = oti.createSerializer(conf); HadoopUtils.setOutputValueWritableClass(conf, serializer.getSerializedClass()); org.apache.hadoop.mapred.OutputFormat baseOutputFormat = ReflectionUtils .newInstance(oti.getOutputFormatClass(), conf); // CHECKSTYLE: stop LineLength org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> baseWriter = getBaseRecordWriter( taskAttemptContext, baseOutputFormat); // CHECKSTYLE: resume LineLength StructObjectInspector soi = Inspectors.createFor(oti.getColumnInfo()); if (!outputConf.shouldResetSlowWrites()) { return new RecordWriterImpl(baseWriter, serializer, soi); } else {//from ww w .j ava 2 s . c om long writeTimeout = outputConf.getWriteResetTimeout(); return new ResettableRecordWriterImpl(baseWriter, serializer, soi, taskAttemptContext, baseOutputFormat, writeTimeout); } }
From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java
License:Apache License
/** * Get the base Hadoop RecordWriter.// ww w. j a va2 s . c om * @param taskAttemptContext TaskAttemptContext * @param baseOutputFormat Hadoop OutputFormat * @return RecordWriter * @throws IOException Hadoop issues */ // CHECKSTYLE: stop LineLengthCheck protected static org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> getBaseRecordWriter( TaskAttemptContext taskAttemptContext, org.apache.hadoop.mapred.OutputFormat baseOutputFormat) throws IOException { // CHECKSTYLE: resume LineLengthCheck HadoopUtils.setWorkOutputDir(taskAttemptContext); JobConf jobConf = new JobConf(taskAttemptContext.getConfiguration()); int fileId = CREATED_FILES_COUNTER.incrementAndGet(); String name = FileOutputFormat.getUniqueName(jobConf, "part-" + fileId); Reporter reporter = new ProgressReporter(taskAttemptContext); org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> baseWriter = baseOutputFormat .getRecordWriter(null, jobConf, name, reporter); LOG.info("getBaseRecordWriter: Created new {} with file {}", baseWriter, name); return baseWriter; }