Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Create the temporary directory that is the root of all of the task 
 * work directories.//  w  w  w . j av  a 2s  .co m
 * @param context the job's context
 */
public void setupJob(JobContext context) throws IOException {
    if (outputPath != null) {
        Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME);
        FileSystem fileSys = tmpDir.getFileSystem(context.getConfiguration());
        if (!fileSys.mkdirs(tmpDir)) {
            LOG.error("Mkdirs failed to create " + tmpDir.toString());
        }
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

private void markOutputDirSuccessful(JobContext context, String path) throws IOException {
    if (outputPath != null) {
        Path p = (path == null) ? outputPath : new Path(outputPath, path);
        LOG.warn("Mark Output success " + p);
        FileSystem fileSys = outputPath.getFileSystem(context.getConfiguration());
        if (fileSys.exists(outputPath)) {
            // create a file in the folder to mark it
            Path filePath = new Path(p, SUCCEEDED_FILE_NAME);
            fileSys.create(filePath).close();
        }//from w ww  . j a va 2  s.c  o  m
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Delete the temporary directory, including all of the work directories.
 * This is called for all jobs whose final run state is SUCCEEDED
 * @param context the job's context.//from w w w.jav a 2  s. c  om
 */
public void commitJob(JobContext context) throws IOException {
    // This is invoked in cleanup phase once need to run it for all directories
    pathNames.add(null);
    for (String path : pathNames) {
        LOG.warn("Cleaning up context " + path + " ...");
        // delete the _temporary folder
        cleanupJob(context, path);
        // check if the o/p dir should be marked
        if (shouldMarkOutputDir(context.getConfiguration())) {
            // create a _success file in the o/p folder
            LOG.warn("Marking output directory successful...");
            markOutputDirSuccessful(context, path);
        }
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

public void cleanupJob(JobContext context, String path) throws IOException {
    if (outputPath != null) {
        Path p = (path == null) ? outputPath : new Path(outputPath, path);
        Path tmpDir = new Path(p, FileOutputCommitter.TEMP_DIR_NAME);
        LOG.warn("Cleanup on " + tmpDir);
        FileSystem fileSys = tmpDir.getFileSystem(context.getConfiguration());
        if (fileSys.exists(tmpDir)) {
            fileSys.delete(tmpDir, true);
        }/*from  w w w  . j  av  a 2 s.co  m*/
    } else {
        LOG.warn("Output path is null in cleanup");
    }
}

From source file:com.rim.logdriver.mapreduce.boom.BoomInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    // Ensure we have sensible defaults for how we build blocks.
    if (conf.get("mapreduce.job.max.split.locations") == null) {
        conf.setLong("mapreduce.job.max.split.locations", MAX_SPLIT_LOCATIONS);
    }//from   www.  j  a  va2 s. c o m
    if (conf.get("mapred.max.split.size") == null) {
        // Try to set the split size to the default block size. In case of
        // failure, we'll use this 128MB default.
        long blockSize = 128 * 1024 * 1024; // 128MB
        try {
            blockSize = FileSystem.get(conf).getDefaultBlockSize();
        } catch (IOException e) {
            LOG.error("Error getting filesystem to get get default block size (this does not bode well).");
        }
        conf.setLong("mapred.max.split.size", blockSize);
    }
    for (String key : new String[] { "mapreduce.job.max.split.locations", "mapred.max.split.size" }) {
        LOG.info("{} = {}", key, context.getConfiguration().get(key));
    }

    return super.getSplits(context);
}

From source file:com.rockstor.compact.mapreduce.CompactDirInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    List<InputSplit> inputSplits = new ArrayList<InputSplit>();
    Compactor.getInstance();/*from   w ww.  j  ava 2 s . c om*/
    Configuration conf = context.getConfiguration();
    Path rootPath = new Path(PathUtil.getInstance().getTaskRootDir());
    FileSystem dfs = FileSystem.get(conf);

    if (!dfs.exists(rootPath)) {
        return inputSplits;
    }

    FileStatus[] fs = dfs.listStatus(rootPath);
    if (fs == null || fs.length == 0) {
        return inputSplits;
    }

    InputSplit inputSplit = null;
    String taskIdName = null;
    for (FileStatus f : fs) {
        if (!f.isDir()) {
            continue;
        }
        taskIdName = f.getPath().getName();
        LOG.info("add task id name: " + taskIdName);
        inputSplit = new CompactDirInputSplit(taskIdName);
        inputSplits.add(inputSplit);
    }

    return inputSplits;
}

From source file:com.run.mapred.hbase2tsv.HFileInputFormat_mr1.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();

    // Explode out directories that match the original FileInputFormat
    // filters since HFiles are written to directories where the
    // directory name is the column name
    for (FileStatus status : super.listStatus(job)) {
        if (status.isDirectory()) {
            FileSystem fs = status.getPath().getFileSystem(job.getConfiguration());
            for (FileStatus match : fs.listStatus(status.getPath(), HIDDEN_FILE_FILTER)) {
                result.add(match);//from  w  w  w. j ava 2  s .c  o  m
            }
        } else {
            result.add(status);
        }
    }

    return result;
}

From source file:com.rw.legion.input.JsonInputFormat.java

License:Apache License

@Override
protected boolean isSplitable(JobContext context, Path file) {
    CompressionCodec codec;//from w w  w . ja va 2s.  c  om
    Configuration job = context.getConfiguration();
    legionObjective = ObjectiveDeserializer.deserialize(job.get("legion_objective"));

    if (legionObjective.getCodecOverride() != null) {
        codec = new CompressionCodecFactory(context.getConfiguration())
                .getCodecByClassName(legionObjective.getCodecOverride());
    } else {
        codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
    }

    if (null == codec) {
        return true;
    }

    return codec instanceof SplittableCompressionCodec;
}

From source file:com.scaleoutsoftware.soss.hserver.DatasetInputFormat.java

License:Apache License

/**
 * Gets the logical split of the input dataset. Splits are calculated either by the underlying input format and wrapped with {@link InputSplit} or
 * retrieved from the image stored in the StateServer.
 */// ww w. j a  v a  2  s.co m
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {

    InputFormat<K, V> underlyingInputFormat = getUnderlyingInputFormat(context.getConfiguration());

    try {
        GridImage image = getImage(underlyingInputFormat.getClass());
        image = image.readOrCreateImage(context, underlyingInputFormat);
        return image.getSplits();
    } catch (StateServerException e) {
        LOG.error("Cannot access ScaleOut StateServer. Falling back to original split.", e);
        return underlyingInputFormat.getSplits(context);
    } catch (ClassNotFoundException e) {
        LOG.error("Image class was not found. Falling back to original split.", e);
        return underlyingInputFormat.getSplits(context);
    }

}

From source file:com.scaleoutsoftware.soss.hserver.FileImage.java

License:Apache License

/**
 * If the file list and input format match but some of the files were appended this method will calculate additional
 * splits to be recorded. The already recorded part of the dataset is going to be served from StateServer store.
 */// ww w .  jav  a2 s.co  m
@Override
public boolean checkCurrent(JobContext context, InputFormat format)
        throws StateServerException, IOException, InterruptedException {
    if (!(format instanceof FileInputFormat))
        throw new IllegalArgumentException("Unexpected InputFormat type " + format.getClass().getName());
    List<FileStatus> newFiles = getFiles(context, (FileInputFormat) format);
    if (newFiles.size() != filesPaths.size())
        return false;
    Collections.sort(newFiles);
    for (FileStatus newFile : newFiles) {
        if (newFile.getModificationTime() != modificationDate.get(newFile.getPath().toString())) {
            if (context.getConfiguration().getBoolean(DatasetInputFormat.enableAppendsPropertyName, false)) {
                addNewSplits(context, format);
                BucketStore bucketStore = BucketStoreFactory.getBucketStore(imageIdString);
                bucketStore.writeImage(this, false);
            } else {
                return false;
            }
        }
    }
    return true;

}