Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.mongodb.hadoop.BSONFileInputFormat.java

License:Apache License

@Override
public List<FileSplit> getSplits(final JobContext context) throws IOException {
    Configuration config = context.getConfiguration();
    PathFilter pf = getInputPathFilter(context);
    ArrayList<FileSplit> splits = new ArrayList<FileSplit>();
    List<FileStatus> inputFiles = listStatus(context);
    for (FileStatus file : inputFiles) {
        if (pf != null && !pf.accept(file.getPath())) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(String.format("skipping file %s not matched path filter.", file.getPath()));
            }/*from  w w  w.ja  v  a 2s . c  o m*/
            continue;
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug("processing file " + file.getPath());
            }
        }

        BSONSplitter splitter = new BSONSplitter();
        splitter.setConf(config);
        splitter.setInputPath(file.getPath());
        Path splitFilePath = new Path(file.getPath().getParent(), "." + file.getPath().getName() + ".splits");
        try {
            splitter.loadSplitsFromSplitFile(file, splitFilePath);
        } catch (BSONSplitter.NoSplitFileException nsfe) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(String.format("No split file for %s; building split file", file.getPath()));
            }
            splitter.readSplitsForFile(file);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug(String.format("BSONSplitter found %d splits.", splitter.getAllSplits().size()));
        }
        splits.addAll(splitter.getAllSplits());
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug(String.format("Total of %d found.", splits.size()));
    }
    return splits;
}

From source file:com.mongodb.hadoop.input.DelegatingInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    Job jobCopy = new Job(conf);
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Map<Path, InputFormat> formatMap = MongoMultipleInputs.getInputFormatMap(job);
    Map<Path, Class<? extends Mapper>> mapperMap = MongoMultipleInputs.getMapperTypeMap(job);
    //     Map<Class<? extends InputFormat>, List<Path>> formatPaths   = new HashMap<Class<? extends InputFormat>, List<Path>>();

    for (Entry<Path, InputFormat> entry : formatMap.entrySet()) {
        InputFormat formatClass = (InputFormat) ReflectionUtils.newInstance(entry.getValue().getClass(), conf);
        Class<? extends Mapper> mapperClass;
        mapperClass = mapperMap.get(entry.getKey());
        try {/*from  ww  w .j  a v  a2s.co m*/
            List<InputSplit> pathSplits = ((MongoInputFormat) formatClass).getSplits(jobCopy, entry.getKey());
            for (InputSplit pathSplit : pathSplits) {
                splits.add(TaggedInputSplitGenerator.getTaggedInputSplit(pathSplit, conf,
                        formatClass.getClass(), mapperClass));
            }
        } catch (ClassCastException e) {
            List<InputSplit> pathSplits = formatClass.getSplits(jobCopy);
            for (InputSplit pathSplit : pathSplits) {
                splits.add(TaggedInputSplitGenerator.getTaggedInputSplit(pathSplit, conf,
                        formatClass.getClass(), mapperClass));
            }
        }
    }
    return splits;
}

From source file:com.mongodb.hadoop.input.MongoMultipleInputs.java

License:Apache License

/**
 * Retrieves a map of {@link Path}s to the {@link InputFormat} class
 * that should be used for them./*from w w  w.j a v a2 s  . c  om*/
 * 
 * @param job The {@link JobContext}
 * @see #addInputPath(JobConf, Path, Class)
 * @return A map of paths to inputformats for the job
 */
@SuppressWarnings("unchecked")
static Map<Path, InputFormat> getInputFormatMap(JobContext job) {
    Map<Path, InputFormat> m = new HashMap<Path, InputFormat>();
    Configuration conf = job.getConfiguration();

    List<MongoRequest> mongoRequests = MongoConfigUtil.getMongoRequests(conf);
    for (MongoRequest mongoRequest : mongoRequests) {
        InputFormat inputFormat;
        try {
            inputFormat = (InputFormat) ReflectionUtils
                    .newInstance(conf.getClassByName(mongoRequest.getInputFormat()), conf);
        } catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
        m.put(new Path(mongoRequest.getInputURI().toString()), inputFormat);
    }
    return m;
}

From source file:com.mongodb.hadoop.input.MongoMultipleInputs.java

License:Apache License

/**
 * Retrieves a map of {@link Path}s to the {@link Mapper} class that
 * should be used for them.//from   ww  w  .  ja  v a 2s.  co  m
 * 
 * @param job The {@link JobContext}
 * @see #addInputPath(JobConf, Path, Class, Class)
 * @return A map of paths to mappers for the job
 */
@SuppressWarnings("unchecked")
static Map<Path, Class<? extends Mapper>> getMapperTypeMap(JobContext job) {
    Configuration conf = job.getConfiguration();
    List<MongoRequest> mongoRequests = MongoConfigUtil.getMongoRequests(conf);
    if (mongoRequests == null) {
        return Collections.emptyMap();
    }
    Map<Path, Class<? extends Mapper>> m = new HashMap<Path, Class<? extends Mapper>>();
    for (MongoRequest mongoRequest : mongoRequests) {
        Class<? extends Mapper> mapClass;
        try {
            mapClass = (Class<? extends Mapper>) conf.getClassByName(mongoRequest.getMapper());
        } catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
        m.put(new Path(mongoRequest.getInputURI().toString()), mapClass);
    }
    return m;
}

From source file:com.mongodb.hadoop.MongoInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(final JobContext context) throws IOException {
    final Configuration conf = context.getConfiguration();
    try {/*ww  w.j a va 2 s. c  o  m*/
        MongoSplitter splitterImpl = MongoSplitterFactory.getSplitter(conf);
        LOG.debug("Using " + splitterImpl.toString() + " to calculate splits.");
        return splitterImpl.calculateSplits();
    } catch (SplitFailedException spfe) {
        throw new IOException(spfe);
    }
}

From source file:com.mongodb.hadoop.MongoOutputFormat.java

License:Apache License

public void checkOutputSpecs(final JobContext context) throws IOException {
    if (MongoConfigUtil.getOutputURIs(context.getConfiguration()).isEmpty()) {
        throw new IOException("No output URI is specified. You must set mongo.output.uri.");
    }/*  w  ww . j av a  2s. co  m*/
}

From source file:com.moz.fiji.mapreduce.framework.HBaseFijiTableInputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override/*from  w  ww .  ja v a2 s.com*/
public List<InputSplit> getSplits(JobContext context) throws IOException {
    final Configuration conf = context.getConfiguration();
    final FijiURI inputTableURI = FijiURI.newBuilder(conf.get(FijiConfKeys.FIJI_INPUT_TABLE_URI)).build();
    final Fiji fiji = Fiji.Factory.open(inputTableURI, conf);
    try {
        final FijiTable table = fiji.openTable(inputTableURI.getTable());
        try {
            final byte[] htableName = getHBaseTableName(table);
            final List<InputSplit> splits = Lists.newArrayList();
            byte[] scanStartKey = HConstants.EMPTY_START_ROW;
            if (null != conf.get(FijiConfKeys.FIJI_START_ROW_KEY)) {
                scanStartKey = Base64.decodeBase64(conf.get(FijiConfKeys.FIJI_START_ROW_KEY));
            }
            byte[] scanLimitKey = HConstants.EMPTY_END_ROW;
            if (null != conf.get(FijiConfKeys.FIJI_LIMIT_ROW_KEY)) {
                scanLimitKey = Base64.decodeBase64(conf.get(FijiConfKeys.FIJI_LIMIT_ROW_KEY));
            }

            for (FijiRegion region : table.getRegions()) {
                final byte[] regionStartKey = region.getStartKey();
                final byte[] regionEndKey = region.getEndKey();
                // Determine if the scan start and limit key fall into the region.
                // Logic was copied from o.a.h.h.m.TableInputFormatBase
                if ((scanStartKey.length == 0 || regionEndKey.length == 0
                        || Bytes.compareTo(scanStartKey, regionEndKey) < 0)
                        && (scanLimitKey.length == 0 || Bytes.compareTo(scanLimitKey, regionStartKey) > 0)) {
                    byte[] splitStartKey = (scanStartKey.length == 0
                            || Bytes.compareTo(regionStartKey, scanStartKey) >= 0) ? regionStartKey
                                    : scanStartKey;
                    byte[] splitEndKey = ((scanLimitKey.length == 0
                            || Bytes.compareTo(regionEndKey, scanLimitKey) <= 0) && regionEndKey.length > 0)
                                    ? regionEndKey
                                    : scanLimitKey;

                    // TODO(FIJIMR-65): For now pick the first available location (ie. region server),
                    // if any.
                    final String location = region.getLocations().isEmpty() ? null
                            : region.getLocations().iterator().next();
                    final TableSplit tableSplit = new TableSplit(htableName, splitStartKey, splitEndKey,
                            location);
                    splits.add(new FijiTableSplit(tableSplit));
                }
            }
            return splits;

        } finally {
            ResourceUtils.releaseOrLog(table);
        }
    } finally {
        ResourceUtils.releaseOrLog(fiji);
    }
}

From source file:com.moz.fiji.schema.mapreduce.FijiTableInputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override//  w w  w . java2 s. c o m
public List<InputSplit> getSplits(JobContext context) throws IOException {
    final Configuration conf = context.getConfiguration();
    final FijiURI inputTableURI = getInputTableURI(conf);
    final Fiji fiji = Fiji.Factory.open(inputTableURI, conf);
    final FijiTable table = fiji.openTable(inputTableURI.getTable());

    final HTableInterface htable = HBaseFijiTable.downcast(table).openHTableConnection();
    try {
        final List<InputSplit> splits = Lists.newArrayList();
        for (FijiRegion region : table.getRegions()) {
            final byte[] startKey = region.getStartKey();
            // TODO: a smart way to get which location is most relevant.
            final String location = region.getLocations().isEmpty() ? null
                    : region.getLocations().iterator().next();
            final TableSplit tableSplit = new TableSplit(htable.getTableName(), startKey, region.getEndKey(),
                    location);
            splits.add(new FijiTableSplit(tableSplit, startKey));
        }
        return splits;

    } finally {
        htable.close();
    }
}

From source file:com.netflix.aegisthus.input.AegisthusInputFormat.java

License:Apache License

@SuppressWarnings("rawtypes")
private Map<String, AbstractType> initConvertors(JobContext job) throws IOException {
    Map<String, AbstractType> convertors = Maps.newHashMap();
    String conversion = job.getConfiguration().get(KEY_TYPE);
    LOG.info(KEY_TYPE + ": " + conversion);
    if (conversion != null) {
        try {//from   ww  w  .j a  v a  2 s .c  o m
            convertors.put(SSTableScanner.KEY, TypeParser.parse(conversion));
        } catch (ConfigurationException e) {
            throw new IOException(e);
        } catch (SyntaxException e) {
            throw new IOException(e);
        }
    }
    conversion = job.getConfiguration().get(COLUMN_TYPE);
    LOG.info(COLUMN_TYPE + ": " + conversion);
    if (conversion != null) {
        try {
            convertors.put(SSTableScanner.COLUMN_NAME_KEY, TypeParser.parse(conversion));
        } catch (ConfigurationException e) {
            throw new IOException(e);
        } catch (SyntaxException e) {
            throw new IOException(e);
        }
    }

    if (convertors.size() == 0) {
        return null;
    }
    return convertors;
}

From source file:com.netflix.aegisthus.input.AegisthusInputFormat.java

License:Apache License

/**
 * The main thing that the addSSTableSplit handles is to split SSTables
 * using their index if available. The general algorithm is that if the file
 * is large than the blocksize plus some fuzzy factor to
 *//*from w ww  . jav a  2 s  .c o m*/
public void addSSTableSplit(List<InputSplit> splits, JobContext job, FileStatus file) throws IOException {
    Path path = file.getPath();
    FileSystem fs = path.getFileSystem(job.getConfiguration());
    long length = file.getLen();
    BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
    if (length != 0) {
        long blockSize = file.getBlockSize();
        long maxSplitSize = (long) (blockSize * .99);
        long fuzzySplit = (long) (blockSize * 1.2);

        long bytesRemaining = length;

        Iterator<Long> scanner = null;
        Path compressionPath = new Path(path.getParent(),
                path.getName().replaceAll("-Data.db", "-CompressionInfo.db"));
        if (!fs.exists(compressionPath)) {
            // Only initialize if we are going to have more than a single
            // split
            if (fuzzySplit < length) {
                Path indexPath = new Path(path.getParent(), path.getName().replaceAll("-Data.db", "-Index.db"));
                if (!fs.exists(indexPath)) {
                    fuzzySplit = length;
                } else {
                    FSDataInputStream fileIn = fs.open(indexPath);
                    scanner = new OffsetScanner(new DataInputStream(new BufferedInputStream(fileIn)),
                            indexPath.getName());
                }
            }
            long splitStart = 0;
            while (splitStart + fuzzySplit < length && scanner.hasNext()) {
                long splitSize = 0;
                // The scanner returns an offset from the start of the file.
                while (splitSize < maxSplitSize && scanner.hasNext()) {
                    splitSize = scanner.next() - splitStart;
                }
                int blkIndex = getBlockIndex(blkLocations, splitStart + (splitSize / 2));
                LOG.info("split path: " + path.getName() + ":" + splitStart + ":" + splitSize);
                splits.add(new AegSplit(path, splitStart, splitSize, blkLocations[blkIndex].getHosts(),
                        convertors));
                bytesRemaining -= splitSize;
                splitStart += splitSize;
            }
        }

        if (bytesRemaining != 0) {
            LOG.info("end path: " + path.getName() + ":" + (length - bytesRemaining) + ":" + bytesRemaining);
            splits.add(new AegSplit(path, length - bytesRemaining, bytesRemaining,
                    blkLocations[blkLocations.length - 1].getHosts(), convertors, fs.exists(compressionPath),
                    compressionPath));
        }
    } else {
        LOG.info("skipping zero length file: " + path.toString());
    }
}