List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java
License:Apache License
/** * Create the temporary directory that is the root of all of the task * work directories.// w w w . j av a 2s .co m * @param context the job's context */ public void setupJob(JobContext context) throws IOException { if (outputPath != null) { Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME); FileSystem fileSys = tmpDir.getFileSystem(context.getConfiguration()); if (!fileSys.mkdirs(tmpDir)) { LOG.error("Mkdirs failed to create " + tmpDir.toString()); } } }
From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java
License:Apache License
private void markOutputDirSuccessful(JobContext context, String path) throws IOException { if (outputPath != null) { Path p = (path == null) ? outputPath : new Path(outputPath, path); LOG.warn("Mark Output success " + p); FileSystem fileSys = outputPath.getFileSystem(context.getConfiguration()); if (fileSys.exists(outputPath)) { // create a file in the folder to mark it Path filePath = new Path(p, SUCCEEDED_FILE_NAME); fileSys.create(filePath).close(); }//from w ww . j a va 2 s.c o m } }
From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java
License:Apache License
/** * Delete the temporary directory, including all of the work directories. * This is called for all jobs whose final run state is SUCCEEDED * @param context the job's context.//from w w w.jav a 2 s. c om */ public void commitJob(JobContext context) throws IOException { // This is invoked in cleanup phase once need to run it for all directories pathNames.add(null); for (String path : pathNames) { LOG.warn("Cleaning up context " + path + " ..."); // delete the _temporary folder cleanupJob(context, path); // check if the o/p dir should be marked if (shouldMarkOutputDir(context.getConfiguration())) { // create a _success file in the o/p folder LOG.warn("Marking output directory successful..."); markOutputDirSuccessful(context, path); } } }
From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java
License:Apache License
public void cleanupJob(JobContext context, String path) throws IOException { if (outputPath != null) { Path p = (path == null) ? outputPath : new Path(outputPath, path); Path tmpDir = new Path(p, FileOutputCommitter.TEMP_DIR_NAME); LOG.warn("Cleanup on " + tmpDir); FileSystem fileSys = tmpDir.getFileSystem(context.getConfiguration()); if (fileSys.exists(tmpDir)) { fileSys.delete(tmpDir, true); }/*from w w w . j av a 2 s.co m*/ } else { LOG.warn("Output path is null in cleanup"); } }
From source file:com.rim.logdriver.mapreduce.boom.BoomInputFormat.java
License:Apache License
public List<InputSplit> getSplits(JobContext context) throws IOException { Configuration conf = context.getConfiguration(); // Ensure we have sensible defaults for how we build blocks. if (conf.get("mapreduce.job.max.split.locations") == null) { conf.setLong("mapreduce.job.max.split.locations", MAX_SPLIT_LOCATIONS); }//from www. j a va2 s. c o m if (conf.get("mapred.max.split.size") == null) { // Try to set the split size to the default block size. In case of // failure, we'll use this 128MB default. long blockSize = 128 * 1024 * 1024; // 128MB try { blockSize = FileSystem.get(conf).getDefaultBlockSize(); } catch (IOException e) { LOG.error("Error getting filesystem to get get default block size (this does not bode well)."); } conf.setLong("mapred.max.split.size", blockSize); } for (String key : new String[] { "mapreduce.job.max.split.locations", "mapred.max.split.size" }) { LOG.info("{} = {}", key, context.getConfiguration().get(key)); } return super.getSplits(context); }
From source file:com.rockstor.compact.mapreduce.CompactDirInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { List<InputSplit> inputSplits = new ArrayList<InputSplit>(); Compactor.getInstance();/*from w ww. j ava 2 s . c om*/ Configuration conf = context.getConfiguration(); Path rootPath = new Path(PathUtil.getInstance().getTaskRootDir()); FileSystem dfs = FileSystem.get(conf); if (!dfs.exists(rootPath)) { return inputSplits; } FileStatus[] fs = dfs.listStatus(rootPath); if (fs == null || fs.length == 0) { return inputSplits; } InputSplit inputSplit = null; String taskIdName = null; for (FileStatus f : fs) { if (!f.isDir()) { continue; } taskIdName = f.getPath().getName(); LOG.info("add task id name: " + taskIdName); inputSplit = new CompactDirInputSplit(taskIdName); inputSplits.add(inputSplit); } return inputSplits; }
From source file:com.run.mapred.hbase2tsv.HFileInputFormat_mr1.java
License:Apache License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); // Explode out directories that match the original FileInputFormat // filters since HFiles are written to directories where the // directory name is the column name for (FileStatus status : super.listStatus(job)) { if (status.isDirectory()) { FileSystem fs = status.getPath().getFileSystem(job.getConfiguration()); for (FileStatus match : fs.listStatus(status.getPath(), HIDDEN_FILE_FILTER)) { result.add(match);//from w w w. j ava 2 s .c o m } } else { result.add(status); } } return result; }
From source file:com.rw.legion.input.JsonInputFormat.java
License:Apache License
@Override protected boolean isSplitable(JobContext context, Path file) { CompressionCodec codec;//from w w w . ja va 2s. c om Configuration job = context.getConfiguration(); legionObjective = ObjectiveDeserializer.deserialize(job.get("legion_objective")); if (legionObjective.getCodecOverride() != null) { codec = new CompressionCodecFactory(context.getConfiguration()) .getCodecByClassName(legionObjective.getCodecOverride()); } else { codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); } if (null == codec) { return true; } return codec instanceof SplittableCompressionCodec; }
From source file:com.scaleoutsoftware.soss.hserver.DatasetInputFormat.java
License:Apache License
/** * Gets the logical split of the input dataset. Splits are calculated either by the underlying input format and wrapped with {@link InputSplit} or * retrieved from the image stored in the StateServer. */// ww w. j a v a 2 s.co m @Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { InputFormat<K, V> underlyingInputFormat = getUnderlyingInputFormat(context.getConfiguration()); try { GridImage image = getImage(underlyingInputFormat.getClass()); image = image.readOrCreateImage(context, underlyingInputFormat); return image.getSplits(); } catch (StateServerException e) { LOG.error("Cannot access ScaleOut StateServer. Falling back to original split.", e); return underlyingInputFormat.getSplits(context); } catch (ClassNotFoundException e) { LOG.error("Image class was not found. Falling back to original split.", e); return underlyingInputFormat.getSplits(context); } }
From source file:com.scaleoutsoftware.soss.hserver.FileImage.java
License:Apache License
/** * If the file list and input format match but some of the files were appended this method will calculate additional * splits to be recorded. The already recorded part of the dataset is going to be served from StateServer store. */// ww w . jav a2 s.co m @Override public boolean checkCurrent(JobContext context, InputFormat format) throws StateServerException, IOException, InterruptedException { if (!(format instanceof FileInputFormat)) throw new IllegalArgumentException("Unexpected InputFormat type " + format.getClass().getName()); List<FileStatus> newFiles = getFiles(context, (FileInputFormat) format); if (newFiles.size() != filesPaths.size()) return false; Collections.sort(newFiles); for (FileStatus newFile : newFiles) { if (newFile.getModificationTime() != modificationDate.get(newFile.getPath().toString())) { if (context.getConfiguration().getBoolean(DatasetInputFormat.enableAppendsPropertyName, false)) { addNewSplits(context, format); BucketStore bucketStore = BucketStoreFactory.getBucketStore(imageIdString); bucketStore.writeImage(this, false); } else { return false; } } } return true; }