List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java
License:Apache License
@Override public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { // check for files on the local FS in the attempt path Path attemptPath = getTaskAttemptPath(context); FileSystem fs = attemptPath.getFileSystem(context.getConfiguration()); if (fs.exists(attemptPath)) { FileStatus[] stats = fs.listStatus(attemptPath); return stats.length > 0; }//w ww .java 2s . co m return false; }
From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java
License:Apache License
protected void commitTaskInternal(final TaskAttemptContext context, Iterable<FileStatus> taskOutput) throws IOException { Configuration conf = context.getConfiguration(); final AmazonS3 client = getClient(getOutputPath(context), conf); final Path attemptPath = getTaskAttemptPath(context); FileSystem attemptFS = attemptPath.getFileSystem(conf); // add the commits file to the wrapped commiter's task attempt location. // this complete file will be committed by the wrapped committer at the end // of this method. Path commitsAttemptPath = wrappedCommitter.getTaskAttemptPath(context); FileSystem commitsFS = commitsAttemptPath.getFileSystem(conf); // keep track of unfinished commits in case one fails. if something fails, // we will try to abort the ones that had already succeeded. final List<S3Util.PendingUpload> commits = Lists.newArrayList(); boolean threw = true; ObjectOutputStream completeUploadRequests = new ObjectOutputStream( commitsFS.create(commitsAttemptPath, false)); try {/*from w w w . j a v a2s .c o m*/ Tasks.foreach(taskOutput).stopOnFailure().throwFailureWhenFinished().executeWith(threadPool) .run(new Task<FileStatus, IOException>() { @Override public void run(FileStatus stat) throws IOException { File localFile = new File(URI.create(stat.getPath().toString()).getPath()); if (localFile.length() <= 0) { return; } String relative = Paths.getRelativePath(attemptPath, stat.getPath()); String partition = getPartition(relative); String key = getFinalKey(relative, context); S3Util.PendingUpload commit = S3Util.multipartUpload(client, localFile, partition, getBucket(context), key, uploadPartSize); commits.add(commit); } }); for (S3Util.PendingUpload commit : commits) { completeUploadRequests.writeObject(commit); } threw = false; } finally { if (threw) { Tasks.foreach(commits).run(new Task<S3Util.PendingUpload, RuntimeException>() { @Override public void run(S3Util.PendingUpload commit) { S3Util.abortCommit(client, commit); } }); try { attemptFS.delete(attemptPath, true); } catch (Exception e) { LOG.error("Failed while cleaning up failed task commit: ", e); } } Closeables.close(completeUploadRequests, threw); } wrappedCommitter.commitTask(context); attemptFS.delete(attemptPath, true); }
From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java
License:Apache License
@Override public void abortTask(TaskAttemptContext context) throws IOException { // the API specifies that the task has not yet been committed, so there are // no uploads that need to be cancelled. just delete files on the local FS. Path attemptPath = getTaskAttemptPath(context); FileSystem fs = attemptPath.getFileSystem(context.getConfiguration()); if (!fs.delete(attemptPath, true)) { LOG.error("Failed to delete task attempt data: " + attemptPath); }//from w w w. j ava 2 s. c o m wrappedCommitter.abortTask(context); }
From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java
License:Apache License
private static Path taskAttemptPath(TaskAttemptContext context, String uuid) { return getTaskAttemptPath(context, Paths.getLocalTaskAttemptTempDir(context.getConfiguration(), uuid, getTaskId(context), getAttemptId(context))); }
From source file:com.netflix.bdp.s3.S3PartitionedOutputCommitter.java
License:Apache License
@Override protected List<FileStatus> getTaskOutput(TaskAttemptContext context) throws IOException { PathFilter filter = HiddenPathFilter.get(); // get files on the local FS in the attempt path Path attemptPath = getTaskAttemptPath(context); FileSystem attemptFS = attemptPath.getFileSystem(context.getConfiguration()); RemoteIterator<LocatedFileStatus> iter = attemptFS.listFiles(attemptPath, true /* recursive */ ); List<FileStatus> stats = Lists.newArrayList(); while (iter.hasNext()) { FileStatus stat = iter.next();/*from w ww . j a va2s . c o m*/ if (filter.accept(stat.getPath())) { stats.add(stat); } } return stats; }
From source file:com.netflix.bdp.s3.S3PartitionedOutputCommitter.java
License:Apache License
@Override public void commitTask(TaskAttemptContext context) throws IOException { // these checks run before any files are uploaded to S3, so it is okay for // this to throw failures. List<FileStatus> taskOutput = getTaskOutput(context); Path attemptPath = getTaskAttemptPath(context); Configuration conf = context.getConfiguration(); FileSystem attemptFS = attemptPath.getFileSystem(conf); Set<String> partitions = getPartitions(attemptFS, attemptPath, taskOutput); // enforce conflict resolution, but only if the mode is FAIL. for APPEND, // it doesn't matter that the partitions are already there, and for REPLACE, // deletion should be done during task commit. if (getMode(context) == ConflictResolution.FAIL) { FileSystem s3 = getOutputPath(context).getFileSystem(context.getConfiguration()); for (String partition : partitions) { // getFinalPath adds the UUID to the file name. this needs the parent. Path partitionPath = getFinalPath(partition + "/file", context).getParent(); if (s3.exists(partitionPath)) { throw new AlreadyExistsException( "Output partition " + partition + " already exists: " + partitionPath); }/* w w w .j ava2 s . c o m*/ } } commitTaskInternal(context, taskOutput); }
From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormat2.java
License:Apache License
static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter( final TaskAttemptContext context) throws IOException, InterruptedException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); // These configs. are from hbase-*.xml final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);/*from www. j a va 2 s . c o m*/ // create a map from column family to the compression algorithm final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf); final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf); final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf); String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY); final HFileDataBlockEncoder encoder; if (dataBlockEncodingStr == null) { encoder = NoOpDataBlockEncoder.INSTANCE; } else { try { encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr)); } catch (IllegalArgumentException ex) { throw new RuntimeException("Invalid data block encoding type configured for the param " + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr); } } return new RecordWriter<ImmutableBytesWritable, V>() { // Map of families to writers and how much has been output on the writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(System.currentTimeMillis()); private boolean rollRequested = false; public void write(ImmutableBytesWritable row, V cell) throws IOException { KeyValue kv = KeyValueUtil.ensureKeyValue(cell); // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family, conf); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } /* Create a new StoreFile.Writer. * @param family * @return A WriterLength, containing a new StoreFile.Writer. * @throws IOException */ private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); String compression = compressionMap.get(family); compression = compression == null ? defaultCompression : compression; String bloomTypeStr = bloomTypeMap.get(family); BloomType bloomType = BloomType.NONE; if (bloomTypeStr != null) { bloomType = BloomType.valueOf(bloomTypeStr); } String blockSizeString = blockSizeMap.get(family); int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE : Integer.parseInt(blockSizeString); Configuration tempConf = new Configuration(conf); tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs) .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR) .build(); this.writers.put(family, wl); return wl; } private void close(final StoreFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); w.appendTrackedTimestampsToMetadata(); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }
From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java
License:Apache License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(// cellKeyValue //static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter() //getRecordWriter() final TaskAttemptContext context) throws IOException, InterruptedException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Path ignoreOutputPath = getDeleteRowKeyFile(outputPath);// final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); // These configs. are from hbase-*.xml final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); // Invented config. Add to hbase-*.xml if other than default // compression. final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);//from w w w .ja v a 2 s .c o m // create a map from column family to the compression algorithm final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf); final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf); final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf); String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY); final HFileDataBlockEncoder encoder; if (dataBlockEncodingStr == null) { encoder = NoOpDataBlockEncoder.INSTANCE; } else { try { encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr)); } catch (IllegalArgumentException ex) { throw new RuntimeException("Invalid data block encoding type configured for the param " + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr); } } return new RecordWriter<ImmutableBytesWritable, KeyValue>() {// VKeyValue // Map of families to writers and how much has been output on the // writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private final FSDataOutputStream dos = fs.create(ignoreOutputPath); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(System.currentTimeMillis()); private boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv)// V cellKeyValue kv throws IOException { // KeyValue kv = KeyValueUtil.ensureKeyValue(cell);// // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); if (ignore(kv)) {// if byte[] readBuf = rowKey; dos.write(readBuf, 0, readBuf.length); dos.write(Bytes.toBytes("\n")); return; } WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory // exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family, conf); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } /* * Create a new StoreFile.Writer. * * @param family * * @return A WriterLength, containing a new StoreFile.Writer. * * @throws IOException */ private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); String compression = compressionMap.get(family); compression = compression == null ? defaultCompression : compression; String bloomTypeStr = bloomTypeMap.get(family); BloomType bloomType = BloomType.NONE; if (bloomTypeStr != null) { bloomType = BloomType.valueOf(bloomTypeStr); } String blockSizeString = blockSizeMap.get(family); int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE : Integer.parseInt(blockSizeString); Configuration tempConf = new Configuration(conf); tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs) .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR) .build(); this.writers.put(family, wl); return wl; } private void close(final StoreFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); w.appendTrackedTimestampsToMetadata(); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { dos.flush();// dos.close();// for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }
From source file:com.nikoo28.excel.mapreduce.ExcelRecordReader.java
License:Apache License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); final Path file = split.getPath(); FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); is = fileIn;//from w w w .ja v a2 s . c o m String line = new ExcelParser().parseExcelData(is); this.strArrayofLines = line.split("\n"); }
From source file:com.ning.metrics.serialization.hadoop.SmileRecordReader.java
License:Apache License
/** * Called once at initialization./*w w w . j a va2 s . c o m*/ * * @param genericSplit the split that defines the range of records to read * @param context the information about the task * @throws java.io.IOException * @throws InterruptedException */ @Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); // Open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); fileIn = fs.open(split.getPath()); if (start != 0) { --start; fileIn.seek(start); } this.pos = start; deserializer = new SmileEnvelopeEventDeserializer(fileIn, false); }