Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Create the temporary directory that is the root of all of the task 
 * work directories.//  w  w  w . j av  a 2s  .co m
 * @param context the job's context
 */
public void setupJob(JobContext context) throws IOException {
    if (outputPath != null) {
        Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME);
        FileSystem fileSys = tmpDir.getFileSystem(context.getConfiguration());
        if (!fileSys.mkdirs(tmpDir)) {
            LOG.error("Mkdirs failed to create " + tmpDir.toString());
        }
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

private void markOutputDirSuccessful(JobContext context, String path) throws IOException {
    if (outputPath != null) {
        Path p = (path == null) ? outputPath : new Path(outputPath, path);
        LOG.warn("Mark Output success " + p);
        FileSystem fileSys = outputPath.getFileSystem(context.getConfiguration());
        if (fileSys.exists(outputPath)) {
            // create a file in the folder to mark it
            Path filePath = new Path(p, SUCCEEDED_FILE_NAME);
            fileSys.create(filePath).close();
        }//from w ww  . j a va 2  s.c  o  m
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Delete the temporary directory, including all of the work directories.
 * This is called for all jobs whose final run state is SUCCEEDED
 * @param context the job's context.//from w w w.jav a 2  s. c  om
 */
public void commitJob(JobContext context) throws IOException {
    // This is invoked in cleanup phase once need to run it for all directories
    pathNames.add(null);
    for (String path : pathNames) {
        LOG.warn("Cleaning up context " + path + " ...");
        // delete the _temporary folder
        cleanupJob(context, path);
        // check if the o/p dir should be marked
        if (shouldMarkOutputDir(context.getConfiguration())) {
            // create a _success file in the o/p folder
            LOG.warn("Marking output directory successful...");
            markOutputDirSuccessful(context, path);
        }
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

public void cleanupJob(JobContext context, String path) throws IOException {
    if (outputPath != null) {
        Path p = (path == null) ? outputPath : new Path(outputPath, path);
        Path tmpDir = new Path(p, FileOutputCommitter.TEMP_DIR_NAME);
        LOG.warn("Cleanup on " + tmpDir);
        FileSystem fileSys = tmpDir.getFileSystem(context.getConfiguration());
        if (fileSys.exists(tmpDir)) {
            fileSys.delete(tmpDir, true);
        }/*from  w w w  . j  av  a 2 s.co  m*/
    } else {
        LOG.warn("Output path is null in cleanup");
    }
}

From source file:com.rim.logdriver.mapreduce.boom.BoomInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    // Ensure we have sensible defaults for how we build blocks.
    if (conf.get("mapreduce.job.max.split.locations") == null) {
        conf.setLong("mapreduce.job.max.split.locations", MAX_SPLIT_LOCATIONS);
    }//from   www.  j  a  va2 s. c o m
    if (conf.get("mapred.max.split.size") == null) {
        // Try to set the split size to the default block size. In case of
        // failure, we'll use this 128MB default.
        long blockSize = 128 * 1024 * 1024; // 128MB
        try {
            blockSize = FileSystem.get(conf).getDefaultBlockSize();
        } catch (IOException e) {
            LOG.error("Error getting filesystem to get get default block size (this does not bode well).");
        }
        conf.setLong("mapred.max.split.size", blockSize);
    }
    for (String key : new String[] { "mapreduce.job.max.split.locations", "mapred.max.split.size" }) {
        LOG.info("{} = {}", key, context.getConfiguration().get(key));
    }

    return super.getSplits(context);
}

From source file:com.rockstor.compact.mapreduce.CompactDirInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    List<InputSplit> inputSplits = new ArrayList<InputSplit>();
    Compactor.getInstance();/*from   w ww.  j  ava 2 s . c om*/
    Configuration conf = context.getConfiguration();
    Path rootPath = new Path(PathUtil.getInstance().getTaskRootDir());
    FileSystem dfs = FileSystem.get(conf);

    if (!dfs.exists(rootPath)) {
        return inputSplits;
    }

    FileStatus[] fs = dfs.listStatus(rootPath);
    if (fs == null || fs.length == 0) {
        return inputSplits;
    }

    InputSplit inputSplit = null;
    String taskIdName = null;
    for (FileStatus f : fs) {
        if (!f.isDir()) {
            continue;
        }
        taskIdName = f.getPath().getName();
        LOG.info("add task id name: " + taskIdName);
        inputSplit = new CompactDirInputSplit(taskIdName);
        inputSplits.add(inputSplit);
    }

    return inputSplits;
}

From source file:com.run.mapred.hbase2tsv.HFileInputFormat_mr1.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();

    // Explode out directories that match the original FileInputFormat
    // filters since HFiles are written to directories where the
    // directory name is the column name
    for (FileStatus status : super.listStatus(job)) {
        if (status.isDirectory()) {
            FileSystem fs = status.getPath().getFileSystem(job.getConfiguration());
            for (FileStatus match : fs.listStatus(status.getPath(), HIDDEN_FILE_FILTER)) {
                result.add(match);//from  w  w  w. j ava 2  s .c  o  m
            }
        } else {
            result.add(status);
        }
    }

    return result;
}

From source file:com.rw.legion.input.JsonInputFormat.java

License:Apache License

@Override
protected boolean isSplitable(JobContext context, Path file) {
    CompressionCodec codec;//from w w  w . ja va 2s.  c  om
    Configuration job = context.getConfiguration();
    legionObjective = ObjectiveDeserializer.deserialize(job.get("legion_objective"));

    if (legionObjective.getCodecOverride() != null) {
        codec = new CompressionCodecFactory(context.getConfiguration())
                .getCodecByClassName(legionObjective.getCodecOverride());
    } else {
        codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
    }

    if (null == codec) {
        return true;
    }

    return codec instanceof SplittableCompressionCodec;
}

From source file:com.scaleoutsoftware.soss.hserver.DatasetInputFormat.java

License:Apache License

/**
 * Gets the logical split of the input dataset. Splits are calculated either by the underlying input format and wrapped with {@link InputSplit} or
 * retrieved from the image stored in the StateServer.
 */// ww w. j a  v a  2  s.co m
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {

    InputFormat<K, V> underlyingInputFormat = getUnderlyingInputFormat(context.getConfiguration());

    try {
        GridImage image = getImage(underlyingInputFormat.getClass());
        image = image.readOrCreateImage(context, underlyingInputFormat);
        return image.getSplits();
    } catch (StateServerException e) {
        LOG.error("Cannot access ScaleOut StateServer. Falling back to original split.", e);
        return underlyingInputFormat.getSplits(context);
    } catch (ClassNotFoundException e) {
        LOG.error("Image class was not found. Falling back to original split.", e);
        return underlyingInputFormat.getSplits(context);
    }

}

From source file:com.scaleoutsoftware.soss.hserver.FileImage.java

License:Apache License

/**
 * If the file list and input format match but some of the files were appended this method will calculate additional
 * splits to be recorded. The already recorded part of the dataset is going to be served from StateServer store.
 */// ww w .  jav  a2 s.co  m
@Override
public boolean checkCurrent(JobContext context, InputFormat format)
        throws StateServerException, IOException, InterruptedException {
    if (!(format instanceof FileInputFormat))
        throw new IllegalArgumentException("Unexpected InputFormat type " + format.getClass().getName());
    List<FileStatus> newFiles = getFiles(context, (FileInputFormat) format);
    if (newFiles.size() != filesPaths.size())
        return false;
    Collections.sort(newFiles);
    for (FileStatus newFile : newFiles) {
        if (newFile.getModificationTime() != modificationDate.get(newFile.getPath().toString())) {
            if (context.getConfiguration().getBoolean(DatasetInputFormat.enableAppendsPropertyName, false)) {
                addNewSplits(context, format);
                BucketStore bucketStore = BucketStoreFactory.getBucketStore(imageIdString);
                bucketStore.writeImage(this, false);
            } else {
                return false;
            }
        }
    }
    return true;

}