Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

/**
 * Returns if the counters for the named outputs are enabled or not. By default these counters are disabled.
 * /* w  w w  .ja  v a2s. co  m*/
 * @param job
 *          the job
 * @return TRUE if the counters are enabled, FALSE if they are disabled.
 */
public static boolean getCountersEnabled(JobContext job) {
    return job.getConfiguration().getBoolean(COUNTERS_ENABLED, false);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.ProxyOutputFormat.java

License:Apache License

private void createOutputFormatIfNeeded(JobContext context) throws IOException {
    if (outputFormat == null) {
        outputFormat = InstancesDistributor.loadInstance(context.getConfiguration(), OutputFormat.class,
                context.getConfiguration().get(PROXIED_OUTPUT_FORMAT_CONF, null), true);
    }/*from   ww w  .  j a  v  a2  s . com*/
}

From source file:com.david.mos.out.FileOutputFormat.java

License:Apache License

public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
    // Ensure that the output directory is set and not already there
    Path outDir = getOutputPath(job);
    if (outDir == null) {
        throw new InvalidJobConfException("Output directory not set.");
    }/*  w ww.  j a v  a2 s.  c om*/

    // get delegation token for outDir's file system
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, job.getConfiguration());

    if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
        throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
    }
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Get the minimum split size//w w w  .j  ava  2  s . c o  m
 * @param job the job
 * @return the minimum number of bytes that can be in a split
 */
public static long getMinSplitSize(JobContext job) {
    return job.getConfiguration().getLong(SPLIT_MINSIZE, 1L);
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Get the maximum split size.//w  w w .ja  v a  2 s . c  o  m
 * @param context the job to look at.
 * @return the maximum number of bytes a split can include
 */
public static long getMaxSplitSize(JobContext context) {
    return context.getConfiguration().getLong(SPLIT_MAXSIZE, Long.MAX_VALUE);
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Get a PathFilter instance of the filter set for the input paths.
 *
 * @return the PathFilter instance set for the job, NULL if none has been set.
 *//*w ww .  j  a v  a2s .  c  om*/
public static PathFilter getInputPathFilter(JobContext context) {
    Configuration conf = context.getConfiguration();
    Class<?> filterClass = conf.getClass(PATHFILTER_CLASS, null, PathFilter.class);
    return (filterClass != null) ? (PathFilter) ReflectionUtils.newInstance(filterClass, conf) : null;
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. //from   www  .  j ava 2  s.c o  m
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/** 
 * Generate the list of files and make them into FileSplits.
 * @param job the job context//  www .  j a v  a  2  s. c o  m
 * @throws IOException
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    for (FileStatus file : files) {
        Path path = file.getPath();
        long length = file.getLen();
        if (length != 0) {
            FileSystem fs = path.getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
            if (isSplitable(job, path)) {
                long blockSize = file.getBlockSize();
                long splitSize = computeSplitSize(blockSize, minSize, maxSize);

                long bytesRemaining = length;
                while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(makeSplit(path, length - bytesRemaining, splitSize,
                            blkLocations[blkIndex].getHosts()));
                    bytesRemaining -= splitSize;
                }

                if (bytesRemaining != 0) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining,
                            blkLocations[blkIndex].getHosts()));
                }
            } else { // not splitable
                splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts()));
            }
        } else {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(path, 0, length, new String[0]));
        }
    }
    // Save the number of input files for metrics/loadgen
    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());
    LOG.debug("Total # of splits: " + splits.size());
    return splits;
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * //from   w  w w  .  j  av a 2 s.c om
 * @param context The job
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
    String dirs = context.getConfiguration().get(INPUT_DIR, "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.edwardsit.spark4n6.EWFImageInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    log.setLevel(Level.DEBUG);//from   ww w .  j a  v  a2 s  . c o m
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    BlockLocation[] blkLocations = null;
    Path path = null;
    FileSystem fs = null;
    EWFFileReader ewf = null;
    ArrayList<EWFSection.SectionPrefix> sections = null;
    Iterator<EWFSection.SectionPrefix> it = null;
    EWFSection.SectionPrefix sp = null;
    Path priorFile = null;
    long priorOffset = 0L;
    FileStatus priorFileStatus = null;
    chunkSize = new EWFSegmentFileReader(fs).DEFAULT_CHUNK_SIZE;
    long priorStart = 0L;
    int blkIndex = 0;
    for (FileStatus file : files) {
        path = file.getPath();
        fs = path.getFileSystem(job.getConfiguration());
        if (path.getName().endsWith(".E01")) {

            ewf = new EWFFileReader(fs, path);
            sections = ewf.getSectionPrefixArray();
            it = sections.iterator();
            while (it.hasNext()) {
                sp = it.next();
                if (sp.sectionType.equals(EWFSection.SectionType.TABLE_TYPE)) {
                    priorFileStatus = fs.getFileStatus(priorFile);
                    for (long i = sp.chunkCount; i > 0L; i = i - getChunksPerSplit(priorFileStatus)) {
                        if (priorFileStatus instanceof LocatedFileStatus) {
                            blkLocations = ((LocatedFileStatus) priorFileStatus).getBlockLocations();
                        } else {
                            blkLocations = fs.getFileBlockLocations(priorFileStatus, priorOffset,
                                    (getChunksPerSplit(priorFileStatus) * chunkSize));
                        }
                        blkIndex = getBlockIndex(blkLocations, priorOffset);
                        if (i > getChunksPerSplit(priorFileStatus)) {
                            log.debug("splits.add(makeSplit(" + priorFile + ", " + (priorStart * chunkSize)
                                    + ", " + (getChunksPerSplit(priorFileStatus) * chunkSize) + ", "
                                    + listHosts(blkLocations, blkIndex) + ");");
                            splits.add(makeSplit(priorFile, (priorStart * chunkSize),
                                    (getChunksPerSplit(priorFileStatus) * chunkSize),
                                    blkLocations[blkIndex].getHosts()));
                            priorStart += getChunksPerSplit(priorFileStatus);
                        } else {
                            log.debug("splits.add(makeSplit(" + priorFile + ", " + (priorStart * chunkSize)
                                    + ", " + (i * chunkSize) + ", " + listHosts(blkLocations, blkIndex) + ");");
                            splits.add(makeSplit(priorFile, (priorStart * chunkSize), (i * chunkSize),
                                    blkLocations[blkIndex].getHosts()));
                            priorStart += i;
                        }
                    }
                }
                priorFile = sp.file;
                priorOffset = sp.fileOffset;
            }
        }
    }
    return splits;
}