Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.netflix.aegisthus.input.AegisthusInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = Lists.newArrayList();
    List<FileStatus> files = listStatus(job);
    convertors = initConvertors(job);/*  w  w  w  .j a v  a  2  s.c o  m*/
    for (FileStatus file : files) {
        String name = file.getPath().getName();
        if (name.endsWith("-Data.db")) {
            addSSTableSplit(splits, job, file);
        } else if (name.startsWith("CommitLog")) {
            LOG.info(String.format("adding %s as a CommitLog split", file.getPath().toUri().toString()));
            BlockLocation[] blkLocations = file.getPath().getFileSystem(job.getConfiguration())
                    .getFileBlockLocations(file, 0, file.getLen());
            splits.add(new AegSplit(file.getPath(), 0, file.getLen(), blkLocations[0].getHosts(),
                    Type.commitlog, convertors));
        } else {
            LOG.info(String.format("adding %s as a json split", file.getPath().toUri().toString()));
            BlockLocation[] blkLocations = file.getPath().getFileSystem(job.getConfiguration())
                    .getFileBlockLocations(file, 0, file.getLen());
            splits.add(new AegSplit(file.getPath(), 0, file.getLen(), blkLocations[0].getHosts(), Type.json));
        }
    }
    return splits;
}

From source file:com.netflix.bdp.s3.MockedS3Committer.java

License:Apache License

@Override
public void commitJob(JobContext context) throws IOException {
    super.commitJob(context);
    Configuration conf = context.getConfiguration();
    try {/*from   ww  w  .  j  a  va 2 s  .  co m*/
        String jobCommitterPath = conf.get("mock-results-file");
        if (jobCommitterPath != null) {
            try (ObjectOutputStream out = new ObjectOutputStream(
                    FileSystem.getLocal(conf).create(new Path(jobCommitterPath), false))) {
                out.writeObject(results);
            }
        }
    } catch (Exception e) {
        // do nothing, the test will fail
    }
}

From source file:com.netflix.bdp.s3.S3DirectoryOutputCommitter.java

License:Apache License

@Override
public void setupJob(JobContext context) throws IOException {
    Path outputPath = getOutputPath(context);
    // use the FS implementation because it will check for _$folder$
    FileSystem fs = outputPath.getFileSystem(context.getConfiguration());
    if (fs.exists(outputPath)) {
        switch (getMode(context)) {
        case FAIL:
            throw new AlreadyExistsException("Output path already exists: " + outputPath);
        case APPEND:
        case REPLACE:
            // do nothing.
            // removing the directory, if overwriting is done in commitJob, in
            // case there is a failure before commit.
        }//from  w ww  .  ja v  a  2 s .  c o  m
    }

    super.setupJob(context);
}

From source file:com.netflix.bdp.s3.S3DirectoryOutputCommitter.java

License:Apache License

@Override
public void commitJob(JobContext context) throws IOException {
    Path outputPath = getOutputPath(context);
    // use the FS implementation because it will check for _$folder$
    FileSystem fs = outputPath.getFileSystem(context.getConfiguration());
    if (fs.exists(outputPath)) {
        switch (getMode(context)) {
        case FAIL:
            // this was checked in setupJob, but this avoids some cases where
            // output was created while the job was processing
            throw new AlreadyExistsException("Output path already exists: " + outputPath);
        case APPEND:
            // do nothing
            break;
        case REPLACE:
            LOG.info("Removing output path to be replaced: " + outputPath);
            if (!fs.delete(outputPath, true /* recursive */ )) {
                throw new IOException("Failed to delete existing output directory for replace:" + outputPath);
            }/*from  ww  w.  j av a 2 s  .c  om*/
            break;
        default:
            throw new RuntimeException("Unknown conflict resolution mode: " + getMode(context));
        }
    }

    super.commitJob(context);
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

public S3MultipartOutputCommitter(Path outputPath, JobContext context) throws IOException {
    super(outputPath, (TaskAttemptContext) context);
    this.constructorOutputPath = outputPath;

    Configuration conf = context.getConfiguration();

    this.uploadPartSize = conf.getLong(S3Committer.UPLOAD_SIZE, S3Committer.DEFAULT_UPLOAD_SIZE);
    // Spark will use a fake app id based on the current minute and job id 0.
    // To avoid collisions, use the YARN application ID for Spark.
    this.uuid = conf.get(S3Committer.UPLOAD_UUID, conf.get(S3Committer.SPARK_WRITE_UUID,
            conf.get(S3Committer.SPARK_APP_ID, context.getJobID().toString())));

    if (context instanceof TaskAttemptContext) {
        this.workPath = taskAttemptPath((TaskAttemptContext) context, uuid);
    } else {/*ww  w .ja  v a2  s  . c o m*/
        this.workPath = null;
    }

    this.wrappedCommitter = new FileOutputCommitter(Paths.getMultipartUploadCommitsDirectory(conf, uuid),
            context);
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

@Override
public void setupJob(JobContext context) throws IOException {
    wrappedCommitter.setupJob(context);/* w w w .  j  ava 2 s  .c o  m*/
    context.getConfiguration().set(S3Committer.UPLOAD_UUID, uuid);
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

private List<S3Util.PendingUpload> getPendingUploads(JobContext context, boolean suppressExceptions)
        throws IOException {
    Path jobAttemptPath = wrappedCommitter.getJobAttemptPath(context);
    final FileSystem attemptFS = jobAttemptPath.getFileSystem(context.getConfiguration());
    FileStatus[] pendingCommitFiles = attemptFS.listStatus(jobAttemptPath, HiddenPathFilter.get());

    final List<S3Util.PendingUpload> pending = Lists.newArrayList();

    // try to read every pending file and add all results to pending.
    // in the case of a failure to read the file, exceptions are held until all
    // reads have been attempted.
    Tasks.foreach(pendingCommitFiles).throwFailureWhenFinished(!suppressExceptions)
            .executeWith(getThreadPool(context)).run(new Task<FileStatus, IOException>() {
                @Override/*from w w  w  .  ja  va2 s. com*/
                public void run(FileStatus pendingCommitFile) throws IOException {
                    pending.addAll(S3Util.readPendingCommits(attemptFS, pendingCommitFile.getPath()));
                }
            });

    return pending;
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

protected void commitJobInternal(JobContext context, List<S3Util.PendingUpload> pending) throws IOException {
    final AmazonS3 client = getClient(getOutputPath(context), context.getConfiguration());

    boolean threw = true;
    try {// ww w .j av  a 2 s . com
        Tasks.foreach(pending).stopOnFailure().throwFailureWhenFinished().executeWith(getThreadPool(context))
                .onFailure(new Tasks.FailureTask<S3Util.PendingUpload, RuntimeException>() {
                    @Override
                    public void run(S3Util.PendingUpload commit, Exception exception) {
                        S3Util.abortCommit(client, commit);
                    }
                }).abortWith(new Task<S3Util.PendingUpload, RuntimeException>() {
                    @Override
                    public void run(S3Util.PendingUpload commit) {
                        S3Util.abortCommit(client, commit);
                    }
                }).revertWith(new Task<S3Util.PendingUpload, RuntimeException>() {
                    @Override
                    public void run(S3Util.PendingUpload commit) {
                        S3Util.revertCommit(client, commit);
                    }
                }).run(new Task<S3Util.PendingUpload, RuntimeException>() {
                    @Override
                    public void run(S3Util.PendingUpload commit) {
                        S3Util.finishCommit(client, commit);
                    }
                });

        threw = false;

    } finally {
        cleanup(context, threw);
    }
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

protected void abortJobInternal(JobContext context, List<S3Util.PendingUpload> pending,
        boolean suppressExceptions) throws IOException {
    final AmazonS3 client = getClient(getOutputPath(context), context.getConfiguration());

    boolean threw = true;
    try {/* w  ww . j a v  a 2s  . c o  m*/
        Tasks.foreach(pending).throwFailureWhenFinished(!suppressExceptions).executeWith(getThreadPool(context))
                .onFailure(new Tasks.FailureTask<S3Util.PendingUpload, RuntimeException>() {
                    @Override
                    public void run(S3Util.PendingUpload commit, Exception exception) {
                        S3Util.abortCommit(client, commit);
                    }
                }).run(new Task<S3Util.PendingUpload, RuntimeException>() {
                    @Override
                    public void run(S3Util.PendingUpload commit) {
                        S3Util.abortCommit(client, commit);
                    }
                });

        threw = false;

    } finally {
        cleanup(context, threw || suppressExceptions);
    }
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

/**
 * Returns an {@link ExecutorService} for parallel tasks. The number of
 * threads in the thread-pool is set by s3.multipart.committer.num-threads.
 * If num-threads is 0, this will return null;
 *
 * @param context the JobContext for this commit
 * @return an {@link ExecutorService} or null for the number of threads
 *//*www .  j  a v a2 s  . c o  m*/
protected final ExecutorService getThreadPool(JobContext context) {
    if (threadPool == null) {
        int numThreads = context.getConfiguration().getInt(S3Committer.NUM_THREADS,
                S3Committer.DEFAULT_NUM_THREADS);
        if (numThreads > 0) {
            this.threadPool = Executors.newFixedThreadPool(numThreads,
                    new ThreadFactoryBuilder().setDaemon(true).setNameFormat("s3-committer-pool-%d").build());
        } else {
            return null;
        }
    }
    return threadPool;
}