Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

@Override
public boolean needsTaskCommit(TaskAttemptContext context) throws IOException {
    // check for files on the local FS in the attempt path
    Path attemptPath = getTaskAttemptPath(context);
    FileSystem fs = attemptPath.getFileSystem(context.getConfiguration());

    if (fs.exists(attemptPath)) {
        FileStatus[] stats = fs.listStatus(attemptPath);
        return stats.length > 0;
    }//w ww .java 2s . co  m

    return false;
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

protected void commitTaskInternal(final TaskAttemptContext context, Iterable<FileStatus> taskOutput)
        throws IOException {
    Configuration conf = context.getConfiguration();
    final AmazonS3 client = getClient(getOutputPath(context), conf);

    final Path attemptPath = getTaskAttemptPath(context);
    FileSystem attemptFS = attemptPath.getFileSystem(conf);

    // add the commits file to the wrapped commiter's task attempt location.
    // this complete file will be committed by the wrapped committer at the end
    // of this method.
    Path commitsAttemptPath = wrappedCommitter.getTaskAttemptPath(context);
    FileSystem commitsFS = commitsAttemptPath.getFileSystem(conf);

    // keep track of unfinished commits in case one fails. if something fails,
    // we will try to abort the ones that had already succeeded.
    final List<S3Util.PendingUpload> commits = Lists.newArrayList();

    boolean threw = true;
    ObjectOutputStream completeUploadRequests = new ObjectOutputStream(
            commitsFS.create(commitsAttemptPath, false));
    try {/*from   w w w .  j  a v a2s  .c  o m*/
        Tasks.foreach(taskOutput).stopOnFailure().throwFailureWhenFinished().executeWith(threadPool)
                .run(new Task<FileStatus, IOException>() {
                    @Override
                    public void run(FileStatus stat) throws IOException {
                        File localFile = new File(URI.create(stat.getPath().toString()).getPath());
                        if (localFile.length() <= 0) {
                            return;
                        }
                        String relative = Paths.getRelativePath(attemptPath, stat.getPath());
                        String partition = getPartition(relative);
                        String key = getFinalKey(relative, context);
                        S3Util.PendingUpload commit = S3Util.multipartUpload(client, localFile, partition,
                                getBucket(context), key, uploadPartSize);
                        commits.add(commit);
                    }
                });

        for (S3Util.PendingUpload commit : commits) {
            completeUploadRequests.writeObject(commit);
        }

        threw = false;

    } finally {
        if (threw) {
            Tasks.foreach(commits).run(new Task<S3Util.PendingUpload, RuntimeException>() {
                @Override
                public void run(S3Util.PendingUpload commit) {
                    S3Util.abortCommit(client, commit);
                }
            });
            try {
                attemptFS.delete(attemptPath, true);
            } catch (Exception e) {
                LOG.error("Failed while cleaning up failed task commit: ", e);
            }
        }
        Closeables.close(completeUploadRequests, threw);
    }

    wrappedCommitter.commitTask(context);

    attemptFS.delete(attemptPath, true);
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

@Override
public void abortTask(TaskAttemptContext context) throws IOException {
    // the API specifies that the task has not yet been committed, so there are
    // no uploads that need to be cancelled. just delete files on the local FS.
    Path attemptPath = getTaskAttemptPath(context);
    FileSystem fs = attemptPath.getFileSystem(context.getConfiguration());
    if (!fs.delete(attemptPath, true)) {
        LOG.error("Failed to delete task attempt data: " + attemptPath);
    }//from  w w  w.  j  ava 2  s. c  o m
    wrappedCommitter.abortTask(context);
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

private static Path taskAttemptPath(TaskAttemptContext context, String uuid) {
    return getTaskAttemptPath(context, Paths.getLocalTaskAttemptTempDir(context.getConfiguration(), uuid,
            getTaskId(context), getAttemptId(context)));
}

From source file:com.netflix.bdp.s3.S3PartitionedOutputCommitter.java

License:Apache License

@Override
protected List<FileStatus> getTaskOutput(TaskAttemptContext context) throws IOException {
    PathFilter filter = HiddenPathFilter.get();

    // get files on the local FS in the attempt path
    Path attemptPath = getTaskAttemptPath(context);
    FileSystem attemptFS = attemptPath.getFileSystem(context.getConfiguration());
    RemoteIterator<LocatedFileStatus> iter = attemptFS.listFiles(attemptPath, true /* recursive */ );

    List<FileStatus> stats = Lists.newArrayList();
    while (iter.hasNext()) {
        FileStatus stat = iter.next();/*from w ww  . j a va2s  .  c o  m*/
        if (filter.accept(stat.getPath())) {
            stats.add(stat);
        }
    }

    return stats;
}

From source file:com.netflix.bdp.s3.S3PartitionedOutputCommitter.java

License:Apache License

@Override
public void commitTask(TaskAttemptContext context) throws IOException {
    // these checks run before any files are uploaded to S3, so it is okay for
    // this to throw failures.
    List<FileStatus> taskOutput = getTaskOutput(context);
    Path attemptPath = getTaskAttemptPath(context);
    Configuration conf = context.getConfiguration();
    FileSystem attemptFS = attemptPath.getFileSystem(conf);
    Set<String> partitions = getPartitions(attemptFS, attemptPath, taskOutput);

    // enforce conflict resolution, but only if the mode is FAIL. for APPEND,
    // it doesn't matter that the partitions are already there, and for REPLACE,
    // deletion should be done during task commit.
    if (getMode(context) == ConflictResolution.FAIL) {
        FileSystem s3 = getOutputPath(context).getFileSystem(context.getConfiguration());
        for (String partition : partitions) {
            // getFinalPath adds the UUID to the file name. this needs the parent.
            Path partitionPath = getFinalPath(partition + "/file", context).getParent();
            if (s3.exists(partitionPath)) {
                throw new AlreadyExistsException(
                        "Output partition " + partition + " already exists: " + partitionPath);
            }/* w w w .j ava2 s  .  c o m*/
        }
    }

    commitTaskInternal(context, taskOutput);
}

From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormat2.java

License:Apache License

static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter(
        final TaskAttemptContext context) throws IOException, InterruptedException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config.  Add to hbase-*.xml if other than default compression.
    final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);/*from   www.  j  a va 2  s  . c  o m*/

    // create a map from column family to the compression algorithm
    final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
    final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
    final HFileDataBlockEncoder encoder;
    if (dataBlockEncodingStr == null) {
        encoder = NoOpDataBlockEncoder.INSTANCE;
    } else {
        try {
            encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr));
        } catch (IllegalArgumentException ex) {
            throw new RuntimeException("Invalid data block encoding type configured for the param "
                    + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr);
        }
    }

    return new RecordWriter<ImmutableBytesWritable, V>() {
        // Map of families to writers and how much has been output on the writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, V cell) throws IOException {
            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);

            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }

            byte[] rowKey = kv.getRow();
            long length = kv.getLength();
            byte[] family = kv.getFamily();
            WriterLength wl = this.writers.get(family);

            // If this is a new column family, verify that the directory exists
            if (wl == null) {
                fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new HLog writer, if necessary
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }

            // we now have the proper HLog writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /* Create a new StoreFile.Writer.
         * @param family
         * @return A WriterLength, containing a new StoreFile.Writer.
         * @throws IOException
         */
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            String compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            String bloomTypeStr = bloomTypeMap.get(family);
            BloomType bloomType = BloomType.NONE;
            if (bloomTypeStr != null) {
                bloomType = BloomType.valueOf(bloomTypeStr);
            }
            String blockSizeString = blockSizeMap.get(family);
            int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE
                    : Integer.parseInt(blockSizeString);
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
                    .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
                    .build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}

From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java

License:Apache License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(// cellKeyValue
        //static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter()
        //getRecordWriter()
        final TaskAttemptContext context) throws IOException, InterruptedException {

    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Path ignoreOutputPath = getDeleteRowKeyFile(outputPath);// 

    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config. Add to hbase-*.xml if other than default
    // compression.
    final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);//from  w  w w  .ja  v a 2 s .c o  m

    // create a map from column family to the compression algorithm
    final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
    final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
    final HFileDataBlockEncoder encoder;
    if (dataBlockEncodingStr == null) {
        encoder = NoOpDataBlockEncoder.INSTANCE;
    } else {
        try {
            encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr));
        } catch (IllegalArgumentException ex) {
            throw new RuntimeException("Invalid data block encoding type configured for the param "
                    + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr);
        }
    }

    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {// VKeyValue

        // Map of families to writers and how much has been output on the
        // writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private final FSDataOutputStream dos = fs.create(ignoreOutputPath);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, KeyValue kv)// V cellKeyValue kv

                throws IOException {
            // KeyValue kv = KeyValueUtil.ensureKeyValue(cell);//

            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }

            byte[] rowKey = kv.getRow();
            long length = kv.getLength();
            byte[] family = kv.getFamily();

            if (ignore(kv)) {// if
                byte[] readBuf = rowKey;
                dos.write(readBuf, 0, readBuf.length);
                dos.write(Bytes.toBytes("\n"));
                return;
            }

            WriterLength wl = this.writers.get(family);

            // If this is a new column family, verify that the directory
            // exists
            if (wl == null) {
                fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new HLog writer, if necessary
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }

            // we now have the proper HLog writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /*
         * Create a new StoreFile.Writer.
         * 
         * @param family
         * 
         * @return A WriterLength, containing a new StoreFile.Writer.
         * 
         * @throws IOException
         */
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            String compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            String bloomTypeStr = bloomTypeMap.get(family);
            BloomType bloomType = BloomType.NONE;
            if (bloomTypeStr != null) {
                bloomType = BloomType.valueOf(bloomTypeStr);
            }
            String blockSizeString = blockSizeMap.get(family);
            int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE
                    : Integer.parseInt(blockSizeString);
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
                    .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
                    .build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            dos.flush();// 
            dos.close();// 
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }

    };
}

From source file:com.nikoo28.excel.mapreduce.ExcelRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    final Path file = split.getPath();

    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());

    is = fileIn;//from  w w w .ja  v a2  s  .  c o  m
    String line = new ExcelParser().parseExcelData(is);
    this.strArrayofLines = line.split("\n");
}

From source file:com.ning.metrics.serialization.hadoop.SmileRecordReader.java

License:Apache License

/**
 * Called once at initialization./*w w w . j a  va2  s  .  c o  m*/
 *
 * @param genericSplit the split that defines the range of records to read
 * @param context      the information about the task
 * @throws java.io.IOException
 * @throws InterruptedException
 */
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();

    // Open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(split.getPath());
    if (start != 0) {
        --start;
        fileIn.seek(start);
    }

    this.pos = start;
    deserializer = new SmileEnvelopeEventDeserializer(fileIn, false);
}