Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.mongodb.hadoop.BSONFileInputFormat.java

License:Apache License

@Override
public List<FileSplit> getSplits(final JobContext context) throws IOException {
    Configuration config = context.getConfiguration();
    PathFilter pf = getInputPathFilter(context);
    ArrayList<FileSplit> splits = new ArrayList<FileSplit>();
    List<FileStatus> inputFiles = listStatus(context);
    for (FileStatus file : inputFiles) {
        if (pf != null && !pf.accept(file.getPath())) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(String.format("skipping file %s not matched path filter.", file.getPath()));
            }/*from  w w  w.ja  v  a 2s . c  o m*/
            continue;
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug("processing file " + file.getPath());
            }
        }

        BSONSplitter splitter = new BSONSplitter();
        splitter.setConf(config);
        splitter.setInputPath(file.getPath());
        Path splitFilePath = new Path(file.getPath().getParent(), "." + file.getPath().getName() + ".splits");
        try {
            splitter.loadSplitsFromSplitFile(file, splitFilePath);
        } catch (BSONSplitter.NoSplitFileException nsfe) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(String.format("No split file for %s; building split file", file.getPath()));
            }
            splitter.readSplitsForFile(file);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug(String.format("BSONSplitter found %d splits.", splitter.getAllSplits().size()));
        }
        splits.addAll(splitter.getAllSplits());
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug(String.format("Total of %d found.", splits.size()));
    }
    return splits;
}

From source file:com.mongodb.hadoop.input.DelegatingInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    Job jobCopy = new Job(conf);
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Map<Path, InputFormat> formatMap = MongoMultipleInputs.getInputFormatMap(job);
    Map<Path, Class<? extends Mapper>> mapperMap = MongoMultipleInputs.getMapperTypeMap(job);
    //     Map<Class<? extends InputFormat>, List<Path>> formatPaths   = new HashMap<Class<? extends InputFormat>, List<Path>>();

    for (Entry<Path, InputFormat> entry : formatMap.entrySet()) {
        InputFormat formatClass = (InputFormat) ReflectionUtils.newInstance(entry.getValue().getClass(), conf);
        Class<? extends Mapper> mapperClass;
        mapperClass = mapperMap.get(entry.getKey());
        try {/*from  ww  w .j  a v  a2s.co m*/
            List<InputSplit> pathSplits = ((MongoInputFormat) formatClass).getSplits(jobCopy, entry.getKey());
            for (InputSplit pathSplit : pathSplits) {
                splits.add(TaggedInputSplitGenerator.getTaggedInputSplit(pathSplit, conf,
                        formatClass.getClass(), mapperClass));
            }
        } catch (ClassCastException e) {
            List<InputSplit> pathSplits = formatClass.getSplits(jobCopy);
            for (InputSplit pathSplit : pathSplits) {
                splits.add(TaggedInputSplitGenerator.getTaggedInputSplit(pathSplit, conf,
                        formatClass.getClass(), mapperClass));
            }
        }
    }
    return splits;
}

From source file:com.mongodb.hadoop.input.MongoMultipleInputs.java

License:Apache License

/**
 * Retrieves a map of {@link Path}s to the {@link InputFormat} class
 * that should be used for them./*from w w  w.j a v a2 s  . c  om*/
 * 
 * @param job The {@link JobContext}
 * @see #addInputPath(JobConf, Path, Class)
 * @return A map of paths to inputformats for the job
 */
@SuppressWarnings("unchecked")
static Map<Path, InputFormat> getInputFormatMap(JobContext job) {
    Map<Path, InputFormat> m = new HashMap<Path, InputFormat>();
    Configuration conf = job.getConfiguration();

    List<MongoRequest> mongoRequests = MongoConfigUtil.getMongoRequests(conf);
    for (MongoRequest mongoRequest : mongoRequests) {
        InputFormat inputFormat;
        try {
            inputFormat = (InputFormat) ReflectionUtils
                    .newInstance(conf.getClassByName(mongoRequest.getInputFormat()), conf);
        } catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
        m.put(new Path(mongoRequest.getInputURI().toString()), inputFormat);
    }
    return m;
}

From source file:com.mongodb.hadoop.input.MongoMultipleInputs.java

License:Apache License

/**
 * Retrieves a map of {@link Path}s to the {@link Mapper} class that
 * should be used for them.//from   ww  w  .  ja  v a 2s.  co  m
 * 
 * @param job The {@link JobContext}
 * @see #addInputPath(JobConf, Path, Class, Class)
 * @return A map of paths to mappers for the job
 */
@SuppressWarnings("unchecked")
static Map<Path, Class<? extends Mapper>> getMapperTypeMap(JobContext job) {
    Configuration conf = job.getConfiguration();
    List<MongoRequest> mongoRequests = MongoConfigUtil.getMongoRequests(conf);
    if (mongoRequests == null) {
        return Collections.emptyMap();
    }
    Map<Path, Class<? extends Mapper>> m = new HashMap<Path, Class<? extends Mapper>>();
    for (MongoRequest mongoRequest : mongoRequests) {
        Class<? extends Mapper> mapClass;
        try {
            mapClass = (Class<? extends Mapper>) conf.getClassByName(mongoRequest.getMapper());
        } catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
        m.put(new Path(mongoRequest.getInputURI().toString()), mapClass);
    }
    return m;
}

From source file:com.mongodb.hadoop.MongoInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(final JobContext context) throws IOException {
    final Configuration conf = context.getConfiguration();
    try {/*ww  w.j a va 2 s. c  o  m*/
        MongoSplitter splitterImpl = MongoSplitterFactory.getSplitter(conf);
        LOG.debug("Using " + splitterImpl.toString() + " to calculate splits.");
        return splitterImpl.calculateSplits();
    } catch (SplitFailedException spfe) {
        throw new IOException(spfe);
    }
}

From source file:com.mongodb.hadoop.MongoOutputFormat.java

License:Apache License

public void checkOutputSpecs(final JobContext context) throws IOException {
    if (MongoConfigUtil.getOutputURIs(context.getConfiguration()).isEmpty()) {
        throw new IOException("No output URI is specified. You must set mongo.output.uri.");
    }/*  w  ww . j av a  2s. co  m*/
}

From source file:com.moz.fiji.mapreduce.framework.HBaseFijiTableInputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override/*from  w  ww .  ja v a2 s.com*/
public List<InputSplit> getSplits(JobContext context) throws IOException {
    final Configuration conf = context.getConfiguration();
    final FijiURI inputTableURI = FijiURI.newBuilder(conf.get(FijiConfKeys.FIJI_INPUT_TABLE_URI)).build();
    final Fiji fiji = Fiji.Factory.open(inputTableURI, conf);
    try {
        final FijiTable table = fiji.openTable(inputTableURI.getTable());
        try {
            final byte[] htableName = getHBaseTableName(table);
            final List<InputSplit> splits = Lists.newArrayList();
            byte[] scanStartKey = HConstants.EMPTY_START_ROW;
            if (null != conf.get(FijiConfKeys.FIJI_START_ROW_KEY)) {
                scanStartKey = Base64.decodeBase64(conf.get(FijiConfKeys.FIJI_START_ROW_KEY));
            }
            byte[] scanLimitKey = HConstants.EMPTY_END_ROW;
            if (null != conf.get(FijiConfKeys.FIJI_LIMIT_ROW_KEY)) {
                scanLimitKey = Base64.decodeBase64(conf.get(FijiConfKeys.FIJI_LIMIT_ROW_KEY));
            }

            for (FijiRegion region : table.getRegions()) {
                final byte[] regionStartKey = region.getStartKey();
                final byte[] regionEndKey = region.getEndKey();
                // Determine if the scan start and limit key fall into the region.
                // Logic was copied from o.a.h.h.m.TableInputFormatBase
                if ((scanStartKey.length == 0 || regionEndKey.length == 0
                        || Bytes.compareTo(scanStartKey, regionEndKey) < 0)
                        && (scanLimitKey.length == 0 || Bytes.compareTo(scanLimitKey, regionStartKey) > 0)) {
                    byte[] splitStartKey = (scanStartKey.length == 0
                            || Bytes.compareTo(regionStartKey, scanStartKey) >= 0) ? regionStartKey
                                    : scanStartKey;
                    byte[] splitEndKey = ((scanLimitKey.length == 0
                            || Bytes.compareTo(regionEndKey, scanLimitKey) <= 0) && regionEndKey.length > 0)
                                    ? regionEndKey
                                    : scanLimitKey;

                    // TODO(FIJIMR-65): For now pick the first available location (ie. region server),
                    // if any.
                    final String location = region.getLocations().isEmpty() ? null
                            : region.getLocations().iterator().next();
                    final TableSplit tableSplit = new TableSplit(htableName, splitStartKey, splitEndKey,
                            location);
                    splits.add(new FijiTableSplit(tableSplit));
                }
            }
            return splits;

        } finally {
            ResourceUtils.releaseOrLog(table);
        }
    } finally {
        ResourceUtils.releaseOrLog(fiji);
    }
}

From source file:com.moz.fiji.schema.mapreduce.FijiTableInputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override//  w w  w . java2 s. c o m
public List<InputSplit> getSplits(JobContext context) throws IOException {
    final Configuration conf = context.getConfiguration();
    final FijiURI inputTableURI = getInputTableURI(conf);
    final Fiji fiji = Fiji.Factory.open(inputTableURI, conf);
    final FijiTable table = fiji.openTable(inputTableURI.getTable());

    final HTableInterface htable = HBaseFijiTable.downcast(table).openHTableConnection();
    try {
        final List<InputSplit> splits = Lists.newArrayList();
        for (FijiRegion region : table.getRegions()) {
            final byte[] startKey = region.getStartKey();
            // TODO: a smart way to get which location is most relevant.
            final String location = region.getLocations().isEmpty() ? null
                    : region.getLocations().iterator().next();
            final TableSplit tableSplit = new TableSplit(htable.getTableName(), startKey, region.getEndKey(),
                    location);
            splits.add(new FijiTableSplit(tableSplit, startKey));
        }
        return splits;

    } finally {
        htable.close();
    }
}

From source file:com.netflix.aegisthus.input.AegisthusInputFormat.java

License:Apache License

@SuppressWarnings("rawtypes")
private Map<String, AbstractType> initConvertors(JobContext job) throws IOException {
    Map<String, AbstractType> convertors = Maps.newHashMap();
    String conversion = job.getConfiguration().get(KEY_TYPE);
    LOG.info(KEY_TYPE + ": " + conversion);
    if (conversion != null) {
        try {//from   ww  w  .j a  v a  2 s .c  o m
            convertors.put(SSTableScanner.KEY, TypeParser.parse(conversion));
        } catch (ConfigurationException e) {
            throw new IOException(e);
        } catch (SyntaxException e) {
            throw new IOException(e);
        }
    }
    conversion = job.getConfiguration().get(COLUMN_TYPE);
    LOG.info(COLUMN_TYPE + ": " + conversion);
    if (conversion != null) {
        try {
            convertors.put(SSTableScanner.COLUMN_NAME_KEY, TypeParser.parse(conversion));
        } catch (ConfigurationException e) {
            throw new IOException(e);
        } catch (SyntaxException e) {
            throw new IOException(e);
        }
    }

    if (convertors.size() == 0) {
        return null;
    }
    return convertors;
}

From source file:com.netflix.aegisthus.input.AegisthusInputFormat.java

License:Apache License

/**
 * The main thing that the addSSTableSplit handles is to split SSTables
 * using their index if available. The general algorithm is that if the file
 * is large than the blocksize plus some fuzzy factor to
 *//*from w ww  . jav a  2 s  .c o m*/
public void addSSTableSplit(List<InputSplit> splits, JobContext job, FileStatus file) throws IOException {
    Path path = file.getPath();
    FileSystem fs = path.getFileSystem(job.getConfiguration());
    long length = file.getLen();
    BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
    if (length != 0) {
        long blockSize = file.getBlockSize();
        long maxSplitSize = (long) (blockSize * .99);
        long fuzzySplit = (long) (blockSize * 1.2);

        long bytesRemaining = length;

        Iterator<Long> scanner = null;
        Path compressionPath = new Path(path.getParent(),
                path.getName().replaceAll("-Data.db", "-CompressionInfo.db"));
        if (!fs.exists(compressionPath)) {
            // Only initialize if we are going to have more than a single
            // split
            if (fuzzySplit < length) {
                Path indexPath = new Path(path.getParent(), path.getName().replaceAll("-Data.db", "-Index.db"));
                if (!fs.exists(indexPath)) {
                    fuzzySplit = length;
                } else {
                    FSDataInputStream fileIn = fs.open(indexPath);
                    scanner = new OffsetScanner(new DataInputStream(new BufferedInputStream(fileIn)),
                            indexPath.getName());
                }
            }
            long splitStart = 0;
            while (splitStart + fuzzySplit < length && scanner.hasNext()) {
                long splitSize = 0;
                // The scanner returns an offset from the start of the file.
                while (splitSize < maxSplitSize && scanner.hasNext()) {
                    splitSize = scanner.next() - splitStart;
                }
                int blkIndex = getBlockIndex(blkLocations, splitStart + (splitSize / 2));
                LOG.info("split path: " + path.getName() + ":" + splitStart + ":" + splitSize);
                splits.add(new AegSplit(path, splitStart, splitSize, blkLocations[blkIndex].getHosts(),
                        convertors));
                bytesRemaining -= splitSize;
                splitStart += splitSize;
            }
        }

        if (bytesRemaining != 0) {
            LOG.info("end path: " + path.getName() + ":" + (length - bytesRemaining) + ":" + bytesRemaining);
            splits.add(new AegSplit(path, length - bytesRemaining, bytesRemaining,
                    blkLocations[blkLocations.length - 1].getHosts(), convertors, fs.exists(compressionPath),
                    compressionPath));
        }
    } else {
        LOG.info("skipping zero length file: " + path.toString());
    }
}