List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:com.netflix.aegisthus.input.AegisthusInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = Lists.newArrayList(); List<FileStatus> files = listStatus(job); convertors = initConvertors(job);/* w w w .j a v a 2 s.c o m*/ for (FileStatus file : files) { String name = file.getPath().getName(); if (name.endsWith("-Data.db")) { addSSTableSplit(splits, job, file); } else if (name.startsWith("CommitLog")) { LOG.info(String.format("adding %s as a CommitLog split", file.getPath().toUri().toString())); BlockLocation[] blkLocations = file.getPath().getFileSystem(job.getConfiguration()) .getFileBlockLocations(file, 0, file.getLen()); splits.add(new AegSplit(file.getPath(), 0, file.getLen(), blkLocations[0].getHosts(), Type.commitlog, convertors)); } else { LOG.info(String.format("adding %s as a json split", file.getPath().toUri().toString())); BlockLocation[] blkLocations = file.getPath().getFileSystem(job.getConfiguration()) .getFileBlockLocations(file, 0, file.getLen()); splits.add(new AegSplit(file.getPath(), 0, file.getLen(), blkLocations[0].getHosts(), Type.json)); } } return splits; }
From source file:com.netflix.bdp.s3.MockedS3Committer.java
License:Apache License
@Override public void commitJob(JobContext context) throws IOException { super.commitJob(context); Configuration conf = context.getConfiguration(); try {/*from ww w . j a va 2 s . co m*/ String jobCommitterPath = conf.get("mock-results-file"); if (jobCommitterPath != null) { try (ObjectOutputStream out = new ObjectOutputStream( FileSystem.getLocal(conf).create(new Path(jobCommitterPath), false))) { out.writeObject(results); } } } catch (Exception e) { // do nothing, the test will fail } }
From source file:com.netflix.bdp.s3.S3DirectoryOutputCommitter.java
License:Apache License
@Override public void setupJob(JobContext context) throws IOException { Path outputPath = getOutputPath(context); // use the FS implementation because it will check for _$folder$ FileSystem fs = outputPath.getFileSystem(context.getConfiguration()); if (fs.exists(outputPath)) { switch (getMode(context)) { case FAIL: throw new AlreadyExistsException("Output path already exists: " + outputPath); case APPEND: case REPLACE: // do nothing. // removing the directory, if overwriting is done in commitJob, in // case there is a failure before commit. }//from w ww . ja v a 2 s . c o m } super.setupJob(context); }
From source file:com.netflix.bdp.s3.S3DirectoryOutputCommitter.java
License:Apache License
@Override public void commitJob(JobContext context) throws IOException { Path outputPath = getOutputPath(context); // use the FS implementation because it will check for _$folder$ FileSystem fs = outputPath.getFileSystem(context.getConfiguration()); if (fs.exists(outputPath)) { switch (getMode(context)) { case FAIL: // this was checked in setupJob, but this avoids some cases where // output was created while the job was processing throw new AlreadyExistsException("Output path already exists: " + outputPath); case APPEND: // do nothing break; case REPLACE: LOG.info("Removing output path to be replaced: " + outputPath); if (!fs.delete(outputPath, true /* recursive */ )) { throw new IOException("Failed to delete existing output directory for replace:" + outputPath); }/*from ww w. j av a 2 s .c om*/ break; default: throw new RuntimeException("Unknown conflict resolution mode: " + getMode(context)); } } super.commitJob(context); }
From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java
License:Apache License
public S3MultipartOutputCommitter(Path outputPath, JobContext context) throws IOException { super(outputPath, (TaskAttemptContext) context); this.constructorOutputPath = outputPath; Configuration conf = context.getConfiguration(); this.uploadPartSize = conf.getLong(S3Committer.UPLOAD_SIZE, S3Committer.DEFAULT_UPLOAD_SIZE); // Spark will use a fake app id based on the current minute and job id 0. // To avoid collisions, use the YARN application ID for Spark. this.uuid = conf.get(S3Committer.UPLOAD_UUID, conf.get(S3Committer.SPARK_WRITE_UUID, conf.get(S3Committer.SPARK_APP_ID, context.getJobID().toString()))); if (context instanceof TaskAttemptContext) { this.workPath = taskAttemptPath((TaskAttemptContext) context, uuid); } else {/*ww w .ja v a2 s . c o m*/ this.workPath = null; } this.wrappedCommitter = new FileOutputCommitter(Paths.getMultipartUploadCommitsDirectory(conf, uuid), context); }
From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java
License:Apache License
@Override public void setupJob(JobContext context) throws IOException { wrappedCommitter.setupJob(context);/* w w w . j ava 2 s .c o m*/ context.getConfiguration().set(S3Committer.UPLOAD_UUID, uuid); }
From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java
License:Apache License
private List<S3Util.PendingUpload> getPendingUploads(JobContext context, boolean suppressExceptions) throws IOException { Path jobAttemptPath = wrappedCommitter.getJobAttemptPath(context); final FileSystem attemptFS = jobAttemptPath.getFileSystem(context.getConfiguration()); FileStatus[] pendingCommitFiles = attemptFS.listStatus(jobAttemptPath, HiddenPathFilter.get()); final List<S3Util.PendingUpload> pending = Lists.newArrayList(); // try to read every pending file and add all results to pending. // in the case of a failure to read the file, exceptions are held until all // reads have been attempted. Tasks.foreach(pendingCommitFiles).throwFailureWhenFinished(!suppressExceptions) .executeWith(getThreadPool(context)).run(new Task<FileStatus, IOException>() { @Override/*from w w w . ja va2 s. com*/ public void run(FileStatus pendingCommitFile) throws IOException { pending.addAll(S3Util.readPendingCommits(attemptFS, pendingCommitFile.getPath())); } }); return pending; }
From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java
License:Apache License
protected void commitJobInternal(JobContext context, List<S3Util.PendingUpload> pending) throws IOException { final AmazonS3 client = getClient(getOutputPath(context), context.getConfiguration()); boolean threw = true; try {// ww w .j av a 2 s . com Tasks.foreach(pending).stopOnFailure().throwFailureWhenFinished().executeWith(getThreadPool(context)) .onFailure(new Tasks.FailureTask<S3Util.PendingUpload, RuntimeException>() { @Override public void run(S3Util.PendingUpload commit, Exception exception) { S3Util.abortCommit(client, commit); } }).abortWith(new Task<S3Util.PendingUpload, RuntimeException>() { @Override public void run(S3Util.PendingUpload commit) { S3Util.abortCommit(client, commit); } }).revertWith(new Task<S3Util.PendingUpload, RuntimeException>() { @Override public void run(S3Util.PendingUpload commit) { S3Util.revertCommit(client, commit); } }).run(new Task<S3Util.PendingUpload, RuntimeException>() { @Override public void run(S3Util.PendingUpload commit) { S3Util.finishCommit(client, commit); } }); threw = false; } finally { cleanup(context, threw); } }
From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java
License:Apache License
protected void abortJobInternal(JobContext context, List<S3Util.PendingUpload> pending, boolean suppressExceptions) throws IOException { final AmazonS3 client = getClient(getOutputPath(context), context.getConfiguration()); boolean threw = true; try {/* w ww . j a v a 2s . c o m*/ Tasks.foreach(pending).throwFailureWhenFinished(!suppressExceptions).executeWith(getThreadPool(context)) .onFailure(new Tasks.FailureTask<S3Util.PendingUpload, RuntimeException>() { @Override public void run(S3Util.PendingUpload commit, Exception exception) { S3Util.abortCommit(client, commit); } }).run(new Task<S3Util.PendingUpload, RuntimeException>() { @Override public void run(S3Util.PendingUpload commit) { S3Util.abortCommit(client, commit); } }); threw = false; } finally { cleanup(context, threw || suppressExceptions); } }
From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java
License:Apache License
/** * Returns an {@link ExecutorService} for parallel tasks. The number of * threads in the thread-pool is set by s3.multipart.committer.num-threads. * If num-threads is 0, this will return null; * * @param context the JobContext for this commit * @return an {@link ExecutorService} or null for the number of threads *//*www . j a v a2 s . c o m*/ protected final ExecutorService getThreadPool(JobContext context) { if (threadPool == null) { int numThreads = context.getConfiguration().getInt(S3Committer.NUM_THREADS, S3Committer.DEFAULT_NUM_THREADS); if (numThreads > 0) { this.threadPool = Executors.newFixedThreadPool(numThreads, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("s3-committer-pool-%d").build()); } else { return null; } } return threadPool; }