Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

/**
 * Returns if the counters for the named outputs are enabled or not. By default these counters are disabled.
 * /* w  w w  .ja  v a2s. co  m*/
 * @param job
 *          the job
 * @return TRUE if the counters are enabled, FALSE if they are disabled.
 */
public static boolean getCountersEnabled(JobContext job) {
    return job.getConfiguration().getBoolean(COUNTERS_ENABLED, false);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.ProxyOutputFormat.java

License:Apache License

private void createOutputFormatIfNeeded(JobContext context) throws IOException {
    if (outputFormat == null) {
        outputFormat = InstancesDistributor.loadInstance(context.getConfiguration(), OutputFormat.class,
                context.getConfiguration().get(PROXIED_OUTPUT_FORMAT_CONF, null), true);
    }/*from   ww w  .  j a  v  a2  s . com*/
}

From source file:com.david.mos.out.FileOutputFormat.java

License:Apache License

public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
    // Ensure that the output directory is set and not already there
    Path outDir = getOutputPath(job);
    if (outDir == null) {
        throw new InvalidJobConfException("Output directory not set.");
    }/*  w ww.  j a v  a2 s.  c om*/

    // get delegation token for outDir's file system
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, job.getConfiguration());

    if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
        throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
    }
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Get the minimum split size//w w w  .j  ava  2  s . c o  m
 * @param job the job
 * @return the minimum number of bytes that can be in a split
 */
public static long getMinSplitSize(JobContext job) {
    return job.getConfiguration().getLong(SPLIT_MINSIZE, 1L);
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Get the maximum split size.//w  w w .ja  v a  2 s . c  o  m
 * @param context the job to look at.
 * @return the maximum number of bytes a split can include
 */
public static long getMaxSplitSize(JobContext context) {
    return context.getConfiguration().getLong(SPLIT_MAXSIZE, Long.MAX_VALUE);
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Get a PathFilter instance of the filter set for the input paths.
 *
 * @return the PathFilter instance set for the job, NULL if none has been set.
 *//*w ww .  j  a v  a2s .  c  om*/
public static PathFilter getInputPathFilter(JobContext context) {
    Configuration conf = context.getConfiguration();
    Class<?> filterClass = conf.getClass(PATHFILTER_CLASS, null, PathFilter.class);
    return (filterClass != null) ? (PathFilter) ReflectionUtils.newInstance(filterClass, conf) : null;
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. //from   www  .  j ava 2  s.c o  m
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration());

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/** 
 * Generate the list of files and make them into FileSplits.
 * @param job the job context//  www .  j a v  a  2  s. c o  m
 * @throws IOException
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    for (FileStatus file : files) {
        Path path = file.getPath();
        long length = file.getLen();
        if (length != 0) {
            FileSystem fs = path.getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
            if (isSplitable(job, path)) {
                long blockSize = file.getBlockSize();
                long splitSize = computeSplitSize(blockSize, minSize, maxSize);

                long bytesRemaining = length;
                while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(makeSplit(path, length - bytesRemaining, splitSize,
                            blkLocations[blkIndex].getHosts()));
                    bytesRemaining -= splitSize;
                }

                if (bytesRemaining != 0) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining,
                            blkLocations[blkIndex].getHosts()));
                }
            } else { // not splitable
                splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts()));
            }
        } else {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(path, 0, length, new String[0]));
        }
    }
    // Save the number of input files for metrics/loadgen
    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());
    LOG.debug("Total # of splits: " + splits.size());
    return splits;
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * //from   w  w w  .  j  av a 2 s.c om
 * @param context The job
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
    String dirs = context.getConfiguration().get(INPUT_DIR, "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.edwardsit.spark4n6.EWFImageInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    log.setLevel(Level.DEBUG);//from   ww w .  j a  v  a2 s  . c o m
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    BlockLocation[] blkLocations = null;
    Path path = null;
    FileSystem fs = null;
    EWFFileReader ewf = null;
    ArrayList<EWFSection.SectionPrefix> sections = null;
    Iterator<EWFSection.SectionPrefix> it = null;
    EWFSection.SectionPrefix sp = null;
    Path priorFile = null;
    long priorOffset = 0L;
    FileStatus priorFileStatus = null;
    chunkSize = new EWFSegmentFileReader(fs).DEFAULT_CHUNK_SIZE;
    long priorStart = 0L;
    int blkIndex = 0;
    for (FileStatus file : files) {
        path = file.getPath();
        fs = path.getFileSystem(job.getConfiguration());
        if (path.getName().endsWith(".E01")) {

            ewf = new EWFFileReader(fs, path);
            sections = ewf.getSectionPrefixArray();
            it = sections.iterator();
            while (it.hasNext()) {
                sp = it.next();
                if (sp.sectionType.equals(EWFSection.SectionType.TABLE_TYPE)) {
                    priorFileStatus = fs.getFileStatus(priorFile);
                    for (long i = sp.chunkCount; i > 0L; i = i - getChunksPerSplit(priorFileStatus)) {
                        if (priorFileStatus instanceof LocatedFileStatus) {
                            blkLocations = ((LocatedFileStatus) priorFileStatus).getBlockLocations();
                        } else {
                            blkLocations = fs.getFileBlockLocations(priorFileStatus, priorOffset,
                                    (getChunksPerSplit(priorFileStatus) * chunkSize));
                        }
                        blkIndex = getBlockIndex(blkLocations, priorOffset);
                        if (i > getChunksPerSplit(priorFileStatus)) {
                            log.debug("splits.add(makeSplit(" + priorFile + ", " + (priorStart * chunkSize)
                                    + ", " + (getChunksPerSplit(priorFileStatus) * chunkSize) + ", "
                                    + listHosts(blkLocations, blkIndex) + ");");
                            splits.add(makeSplit(priorFile, (priorStart * chunkSize),
                                    (getChunksPerSplit(priorFileStatus) * chunkSize),
                                    blkLocations[blkIndex].getHosts()));
                            priorStart += getChunksPerSplit(priorFileStatus);
                        } else {
                            log.debug("splits.add(makeSplit(" + priorFile + ", " + (priorStart * chunkSize)
                                    + ", " + (i * chunkSize) + ", " + listHosts(blkLocations, blkIndex) + ");");
                            splits.add(makeSplit(priorFile, (priorStart * chunkSize), (i * chunkSize),
                                    blkLocations[blkIndex].getHosts()));
                            priorStart += i;
                        }
                    }
                }
                priorFile = sp.file;
                priorOffset = sp.fileOffset;
            }
        }
    }
    return splits;
}