Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.netflix.aegisthus.input.AegisthusInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = Lists.newArrayList();
    List<FileStatus> files = listStatus(job);
    convertors = initConvertors(job);/*  w  w  w  .j a v  a  2  s.c o  m*/
    for (FileStatus file : files) {
        String name = file.getPath().getName();
        if (name.endsWith("-Data.db")) {
            addSSTableSplit(splits, job, file);
        } else if (name.startsWith("CommitLog")) {
            LOG.info(String.format("adding %s as a CommitLog split", file.getPath().toUri().toString()));
            BlockLocation[] blkLocations = file.getPath().getFileSystem(job.getConfiguration())
                    .getFileBlockLocations(file, 0, file.getLen());
            splits.add(new AegSplit(file.getPath(), 0, file.getLen(), blkLocations[0].getHosts(),
                    Type.commitlog, convertors));
        } else {
            LOG.info(String.format("adding %s as a json split", file.getPath().toUri().toString()));
            BlockLocation[] blkLocations = file.getPath().getFileSystem(job.getConfiguration())
                    .getFileBlockLocations(file, 0, file.getLen());
            splits.add(new AegSplit(file.getPath(), 0, file.getLen(), blkLocations[0].getHosts(), Type.json));
        }
    }
    return splits;
}

From source file:com.netflix.bdp.s3.MockedS3Committer.java

License:Apache License

@Override
public void commitJob(JobContext context) throws IOException {
    super.commitJob(context);
    Configuration conf = context.getConfiguration();
    try {/*from   ww  w  .  j  a  va 2 s  .  co m*/
        String jobCommitterPath = conf.get("mock-results-file");
        if (jobCommitterPath != null) {
            try (ObjectOutputStream out = new ObjectOutputStream(
                    FileSystem.getLocal(conf).create(new Path(jobCommitterPath), false))) {
                out.writeObject(results);
            }
        }
    } catch (Exception e) {
        // do nothing, the test will fail
    }
}

From source file:com.netflix.bdp.s3.S3DirectoryOutputCommitter.java

License:Apache License

@Override
public void setupJob(JobContext context) throws IOException {
    Path outputPath = getOutputPath(context);
    // use the FS implementation because it will check for _$folder$
    FileSystem fs = outputPath.getFileSystem(context.getConfiguration());
    if (fs.exists(outputPath)) {
        switch (getMode(context)) {
        case FAIL:
            throw new AlreadyExistsException("Output path already exists: " + outputPath);
        case APPEND:
        case REPLACE:
            // do nothing.
            // removing the directory, if overwriting is done in commitJob, in
            // case there is a failure before commit.
        }//from  w ww  .  ja v  a  2 s .  c o  m
    }

    super.setupJob(context);
}

From source file:com.netflix.bdp.s3.S3DirectoryOutputCommitter.java

License:Apache License

@Override
public void commitJob(JobContext context) throws IOException {
    Path outputPath = getOutputPath(context);
    // use the FS implementation because it will check for _$folder$
    FileSystem fs = outputPath.getFileSystem(context.getConfiguration());
    if (fs.exists(outputPath)) {
        switch (getMode(context)) {
        case FAIL:
            // this was checked in setupJob, but this avoids some cases where
            // output was created while the job was processing
            throw new AlreadyExistsException("Output path already exists: " + outputPath);
        case APPEND:
            // do nothing
            break;
        case REPLACE:
            LOG.info("Removing output path to be replaced: " + outputPath);
            if (!fs.delete(outputPath, true /* recursive */ )) {
                throw new IOException("Failed to delete existing output directory for replace:" + outputPath);
            }/*from  ww  w.  j av a 2 s  .c  om*/
            break;
        default:
            throw new RuntimeException("Unknown conflict resolution mode: " + getMode(context));
        }
    }

    super.commitJob(context);
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

public S3MultipartOutputCommitter(Path outputPath, JobContext context) throws IOException {
    super(outputPath, (TaskAttemptContext) context);
    this.constructorOutputPath = outputPath;

    Configuration conf = context.getConfiguration();

    this.uploadPartSize = conf.getLong(S3Committer.UPLOAD_SIZE, S3Committer.DEFAULT_UPLOAD_SIZE);
    // Spark will use a fake app id based on the current minute and job id 0.
    // To avoid collisions, use the YARN application ID for Spark.
    this.uuid = conf.get(S3Committer.UPLOAD_UUID, conf.get(S3Committer.SPARK_WRITE_UUID,
            conf.get(S3Committer.SPARK_APP_ID, context.getJobID().toString())));

    if (context instanceof TaskAttemptContext) {
        this.workPath = taskAttemptPath((TaskAttemptContext) context, uuid);
    } else {/*ww  w .ja  v a2  s  . c o m*/
        this.workPath = null;
    }

    this.wrappedCommitter = new FileOutputCommitter(Paths.getMultipartUploadCommitsDirectory(conf, uuid),
            context);
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

@Override
public void setupJob(JobContext context) throws IOException {
    wrappedCommitter.setupJob(context);/* w w w .  j  ava 2 s  .c o  m*/
    context.getConfiguration().set(S3Committer.UPLOAD_UUID, uuid);
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

private List<S3Util.PendingUpload> getPendingUploads(JobContext context, boolean suppressExceptions)
        throws IOException {
    Path jobAttemptPath = wrappedCommitter.getJobAttemptPath(context);
    final FileSystem attemptFS = jobAttemptPath.getFileSystem(context.getConfiguration());
    FileStatus[] pendingCommitFiles = attemptFS.listStatus(jobAttemptPath, HiddenPathFilter.get());

    final List<S3Util.PendingUpload> pending = Lists.newArrayList();

    // try to read every pending file and add all results to pending.
    // in the case of a failure to read the file, exceptions are held until all
    // reads have been attempted.
    Tasks.foreach(pendingCommitFiles).throwFailureWhenFinished(!suppressExceptions)
            .executeWith(getThreadPool(context)).run(new Task<FileStatus, IOException>() {
                @Override/*from w w  w  .  ja  va2 s. com*/
                public void run(FileStatus pendingCommitFile) throws IOException {
                    pending.addAll(S3Util.readPendingCommits(attemptFS, pendingCommitFile.getPath()));
                }
            });

    return pending;
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

protected void commitJobInternal(JobContext context, List<S3Util.PendingUpload> pending) throws IOException {
    final AmazonS3 client = getClient(getOutputPath(context), context.getConfiguration());

    boolean threw = true;
    try {// ww w .j av  a 2 s . com
        Tasks.foreach(pending).stopOnFailure().throwFailureWhenFinished().executeWith(getThreadPool(context))
                .onFailure(new Tasks.FailureTask<S3Util.PendingUpload, RuntimeException>() {
                    @Override
                    public void run(S3Util.PendingUpload commit, Exception exception) {
                        S3Util.abortCommit(client, commit);
                    }
                }).abortWith(new Task<S3Util.PendingUpload, RuntimeException>() {
                    @Override
                    public void run(S3Util.PendingUpload commit) {
                        S3Util.abortCommit(client, commit);
                    }
                }).revertWith(new Task<S3Util.PendingUpload, RuntimeException>() {
                    @Override
                    public void run(S3Util.PendingUpload commit) {
                        S3Util.revertCommit(client, commit);
                    }
                }).run(new Task<S3Util.PendingUpload, RuntimeException>() {
                    @Override
                    public void run(S3Util.PendingUpload commit) {
                        S3Util.finishCommit(client, commit);
                    }
                });

        threw = false;

    } finally {
        cleanup(context, threw);
    }
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

protected void abortJobInternal(JobContext context, List<S3Util.PendingUpload> pending,
        boolean suppressExceptions) throws IOException {
    final AmazonS3 client = getClient(getOutputPath(context), context.getConfiguration());

    boolean threw = true;
    try {/* w  ww . j a v  a 2s  . c o  m*/
        Tasks.foreach(pending).throwFailureWhenFinished(!suppressExceptions).executeWith(getThreadPool(context))
                .onFailure(new Tasks.FailureTask<S3Util.PendingUpload, RuntimeException>() {
                    @Override
                    public void run(S3Util.PendingUpload commit, Exception exception) {
                        S3Util.abortCommit(client, commit);
                    }
                }).run(new Task<S3Util.PendingUpload, RuntimeException>() {
                    @Override
                    public void run(S3Util.PendingUpload commit) {
                        S3Util.abortCommit(client, commit);
                    }
                });

        threw = false;

    } finally {
        cleanup(context, threw || suppressExceptions);
    }
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

/**
 * Returns an {@link ExecutorService} for parallel tasks. The number of
 * threads in the thread-pool is set by s3.multipart.committer.num-threads.
 * If num-threads is 0, this will return null;
 *
 * @param context the JobContext for this commit
 * @return an {@link ExecutorService} or null for the number of threads
 *//*www .  j  a v a2 s  . c o  m*/
protected final ExecutorService getThreadPool(JobContext context) {
    if (threadPool == null) {
        int numThreads = context.getConfiguration().getInt(S3Committer.NUM_THREADS,
                S3Committer.DEFAULT_NUM_THREADS);
        if (numThreads > 0) {
            this.threadPool = Executors.newFixedThreadPool(numThreads,
                    new ThreadFactoryBuilder().setDaemon(true).setNameFormat("s3-committer-pool-%d").build());
        } else {
            return null;
        }
    }
    return threadPool;
}