Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.jumptap.h2redis.RedisOutputFormat.java

License:Open Source License

@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String host = conf.get(RedisDriver.REDIS_HOST);
    int key = conf.getInt(RedisDriver.REDIS_KEY_FIELD, -1);
    int hash = conf.getInt(RedisDriver.REDIS_HASHKEY_FIELD, -1);
    int val = conf.getInt(RedisDriver.REDIS_HASHVAL_FIELD, -1);
    if (host == null || host.isEmpty() || key == -1 || hash == -1 || val == -1)
        throw new IOException("Missing configuration param, check usage.");
}

From source file:com.junz.hadoop.custom.SytsLogInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration job = context.getConfiguration();
    List<InputSplit> splits = new ArrayList<InputSplit>();

    try {/*from   w ww  . jav  a  2s.co m*/
        long startId = job.getLong(START_ID_PROPERTY, 1);
        long numberOfIds = job.getLong(NUMBER_LOG_PROPERTY, 1);
        int groups = job.getInt(NUMBER_MAP_PROPERTY, 1);
        long groupSize = (numberOfIds / groups);

        // Split the rows into n-number of chunks and adjust the last chunk
        // accordingly
        for (int i = 0; i < groups; i++) {
            DBInputSplit split;

            if ((i + 1) == groups)
                split = new DBInputSplit(i * groupSize + startId, numberOfIds + startId);
            else
                split = new DBInputSplit(i * groupSize + startId, (i * groupSize) + groupSize + startId);

            splits.add(split);
        }

        return splits;
    } catch (Exception e) {
        throw new IOException(e.getMessage());
    }
}

From source file:com.knewton.mapreduce.io.SSTableInputFormat.java

License:Apache License

/**
 * Expands all directories passed as input and keeps only valid data tables.
 *
 * @return A list of all the data tables found under the input directories.
 *///from   w  w w. j  a  v  a 2s  . c  o  m
@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    Configuration conf = job.getConfiguration();
    List<FileStatus> files = super.listStatus(job);
    DataTablePathFilter dataTableFilter = getDataTableFilter(conf);
    files = cleanUpBackupDir(files);
    for (int i = 0; i < files.size(); i++) {
        FileStatus file = files.get(i);
        Path p = file.getPath();
        // Expand if directory
        if (file.isDirectory() && p != null) {
            LOG.info("Expanding {}", p);
            FileSystem fs = p.getFileSystem(conf);
            FileStatus[] children = fs.listStatus(p);
            List<FileStatus> listChildren = Lists.newArrayList(children);
            listChildren = cleanUpBackupDir(listChildren);
            files.addAll(i + 1, listChildren);
        }
        if (!dataTableFilter.accept(file.getPath())) {
            LOG.info("Removing {}", file.getPath());
            files.remove(i);
            i--;
        }
    }
    return files;
}

From source file:com.linkedin.cubert.io.avro.PigAvroInputFormatAdaptor.java

License:Open Source License

@Override
protected boolean isSplitable(JobContext context, Path filename) {
    Configuration conf = context.getConfiguration();
    return !conf.getBoolean("cubert.avro.input.unsplittable", false);
}

From source file:com.linkedin.cubert.io.avro.PigAvroInputFormatAdaptor.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    return getDelegate(job.getConfiguration()).getSplits(job);
}

From source file:com.linkedin.cubert.io.CubertInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    ConfigurationDiff confDiff = new ConfigurationDiff(conf);

    int numMultiMappers = confDiff.getNumDiffs();

    List<InputSplit> splits = new ArrayList<InputSplit>();

    for (int mapperIndex = 0; mapperIndex < numMultiMappers; mapperIndex++) {
        // reset conf to multimapper i
        confDiff.applyDiff(mapperIndex);

        // get the actual input format
        InputFormat<K, V> actualInputFormat = getActualInputFormat(context);

        List<InputSplit> actualSplits = null;

        // check if combined input split is requested
        boolean combineSplit = conf.getBoolean(CubertStrings.COMBINED_INPUT, false);

        if (combineSplit) {
            // Create CombinedFileInputFormat
            CombineFileInputFormat<K, V> cfif = new CombineFileInputFormat<K, V>() {
                @Override// w w  w.j ava 2  s.c  om
                public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
                        throws IOException {
                    throw new IllegalStateException("Should not be called");
                }
            };

            // get the splits
            actualSplits = cfif.getSplits(context);
        } else {
            actualSplits = actualInputFormat.getSplits(context);
        }

        // embed each split in MultiMapperSplit and add to list
        for (InputSplit actualSplit : actualSplits)
            splits.add(new MultiMapperSplit(actualSplit, mapperIndex));

        // undo the diff
        confDiff.undoDiff(mapperIndex);
    }
    return splits;
}

From source file:com.linkedin.cubert.io.MultiMapperInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    int numMultiMappers = conf.getInt(CubertStrings.NUM_MULTI_MAPPERS, 1);
    List<InputSplit> splits = new ArrayList<InputSplit>();

    for (int i = 0; i < numMultiMappers; i++) {

        String dirs = conf.get(CubertStrings.MAPRED_INPUT_DIR + i);
        conf.set("mapred.input.dir", dirs);

        List<InputSplit> mapperSplits = getDelegate(context.getConfiguration(), i).getSplits(context);

        for (InputSplit split : mapperSplits) {
            splits.add(new MultiMapperSplit((FileSplit) split, i));
        }/*from  www.j a va2 s  . c o m*/
    }
    return splits;
}

From source file:com.linkedin.cubert.io.rubix.RubixInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    FileSystem.get(job.getConfiguration());

    for (FileStatus file : files) {
        Path path = file.getPath();
        RubixFile<K, V> rubixFile = new RubixFile<K, V>(job.getConfiguration(), path);

        List<KeyData<K>> keyDataList;
        try {//from  w  w  w.  ja  v  a2 s  .  c  o m
            keyDataList = rubixFile.getKeyData();
            for (KeyData<K> keyData : keyDataList) {
                InputSplit split = new RubixInputSplit<K, V>(job.getConfiguration(), path, keyData.getKey(),
                        keyData.getOffset(), keyData.getLength(), keyData.getBlockId(), keyData.getNumRecords(),
                        rubixFile.getKeyClass(), rubixFile.getValueClass(), rubixFile.getSchema(),
                        rubixFile.getBlockSerializationType());

                splits.add(split);
            }

        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (InstantiationException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    return splits;
}

From source file:com.linkedin.cubert.io.virtual.VirtualInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    int numMappers = job.getConfiguration().getInt("mappers", -1);
    if (numMappers == -1)
        throw new RuntimeException("Number of mappers not set for virtual input format.");

    List<InputSplit> splits = new ArrayList<InputSplit>(numMappers);
    for (int i = 0; i < numMappers; i++)
        splits.add(new VirtualInputSplit<K, V>());
    return splits;
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroInputFormat.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    return MapRedUtil.getAllFileRecursively(super.listStatus(job), job.getConfiguration());
}