Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.jumptap.h2redis.RedisOutputFormat.java

License:Open Source License

@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String host = conf.get(RedisDriver.REDIS_HOST);
    int key = conf.getInt(RedisDriver.REDIS_KEY_FIELD, -1);
    int hash = conf.getInt(RedisDriver.REDIS_HASHKEY_FIELD, -1);
    int val = conf.getInt(RedisDriver.REDIS_HASHVAL_FIELD, -1);
    if (host == null || host.isEmpty() || key == -1 || hash == -1 || val == -1)
        throw new IOException("Missing configuration param, check usage.");
}

From source file:com.junz.hadoop.custom.SytsLogInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration job = context.getConfiguration();
    List<InputSplit> splits = new ArrayList<InputSplit>();

    try {/*from   w ww  . jav  a  2s.co m*/
        long startId = job.getLong(START_ID_PROPERTY, 1);
        long numberOfIds = job.getLong(NUMBER_LOG_PROPERTY, 1);
        int groups = job.getInt(NUMBER_MAP_PROPERTY, 1);
        long groupSize = (numberOfIds / groups);

        // Split the rows into n-number of chunks and adjust the last chunk
        // accordingly
        for (int i = 0; i < groups; i++) {
            DBInputSplit split;

            if ((i + 1) == groups)
                split = new DBInputSplit(i * groupSize + startId, numberOfIds + startId);
            else
                split = new DBInputSplit(i * groupSize + startId, (i * groupSize) + groupSize + startId);

            splits.add(split);
        }

        return splits;
    } catch (Exception e) {
        throw new IOException(e.getMessage());
    }
}

From source file:com.knewton.mapreduce.io.SSTableInputFormat.java

License:Apache License

/**
 * Expands all directories passed as input and keeps only valid data tables.
 *
 * @return A list of all the data tables found under the input directories.
 *///from   w  w w. j  a  v  a 2s  . c  o  m
@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    Configuration conf = job.getConfiguration();
    List<FileStatus> files = super.listStatus(job);
    DataTablePathFilter dataTableFilter = getDataTableFilter(conf);
    files = cleanUpBackupDir(files);
    for (int i = 0; i < files.size(); i++) {
        FileStatus file = files.get(i);
        Path p = file.getPath();
        // Expand if directory
        if (file.isDirectory() && p != null) {
            LOG.info("Expanding {}", p);
            FileSystem fs = p.getFileSystem(conf);
            FileStatus[] children = fs.listStatus(p);
            List<FileStatus> listChildren = Lists.newArrayList(children);
            listChildren = cleanUpBackupDir(listChildren);
            files.addAll(i + 1, listChildren);
        }
        if (!dataTableFilter.accept(file.getPath())) {
            LOG.info("Removing {}", file.getPath());
            files.remove(i);
            i--;
        }
    }
    return files;
}

From source file:com.linkedin.cubert.io.avro.PigAvroInputFormatAdaptor.java

License:Open Source License

@Override
protected boolean isSplitable(JobContext context, Path filename) {
    Configuration conf = context.getConfiguration();
    return !conf.getBoolean("cubert.avro.input.unsplittable", false);
}

From source file:com.linkedin.cubert.io.avro.PigAvroInputFormatAdaptor.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    return getDelegate(job.getConfiguration()).getSplits(job);
}

From source file:com.linkedin.cubert.io.CubertInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    ConfigurationDiff confDiff = new ConfigurationDiff(conf);

    int numMultiMappers = confDiff.getNumDiffs();

    List<InputSplit> splits = new ArrayList<InputSplit>();

    for (int mapperIndex = 0; mapperIndex < numMultiMappers; mapperIndex++) {
        // reset conf to multimapper i
        confDiff.applyDiff(mapperIndex);

        // get the actual input format
        InputFormat<K, V> actualInputFormat = getActualInputFormat(context);

        List<InputSplit> actualSplits = null;

        // check if combined input split is requested
        boolean combineSplit = conf.getBoolean(CubertStrings.COMBINED_INPUT, false);

        if (combineSplit) {
            // Create CombinedFileInputFormat
            CombineFileInputFormat<K, V> cfif = new CombineFileInputFormat<K, V>() {
                @Override// w w  w.j ava 2  s.c  om
                public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
                        throws IOException {
                    throw new IllegalStateException("Should not be called");
                }
            };

            // get the splits
            actualSplits = cfif.getSplits(context);
        } else {
            actualSplits = actualInputFormat.getSplits(context);
        }

        // embed each split in MultiMapperSplit and add to list
        for (InputSplit actualSplit : actualSplits)
            splits.add(new MultiMapperSplit(actualSplit, mapperIndex));

        // undo the diff
        confDiff.undoDiff(mapperIndex);
    }
    return splits;
}

From source file:com.linkedin.cubert.io.MultiMapperInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    int numMultiMappers = conf.getInt(CubertStrings.NUM_MULTI_MAPPERS, 1);
    List<InputSplit> splits = new ArrayList<InputSplit>();

    for (int i = 0; i < numMultiMappers; i++) {

        String dirs = conf.get(CubertStrings.MAPRED_INPUT_DIR + i);
        conf.set("mapred.input.dir", dirs);

        List<InputSplit> mapperSplits = getDelegate(context.getConfiguration(), i).getSplits(context);

        for (InputSplit split : mapperSplits) {
            splits.add(new MultiMapperSplit((FileSplit) split, i));
        }/*from  www.j a va2 s  . c o m*/
    }
    return splits;
}

From source file:com.linkedin.cubert.io.rubix.RubixInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    FileSystem.get(job.getConfiguration());

    for (FileStatus file : files) {
        Path path = file.getPath();
        RubixFile<K, V> rubixFile = new RubixFile<K, V>(job.getConfiguration(), path);

        List<KeyData<K>> keyDataList;
        try {//from  w  w  w.  ja  v  a2 s  .  c  o m
            keyDataList = rubixFile.getKeyData();
            for (KeyData<K> keyData : keyDataList) {
                InputSplit split = new RubixInputSplit<K, V>(job.getConfiguration(), path, keyData.getKey(),
                        keyData.getOffset(), keyData.getLength(), keyData.getBlockId(), keyData.getNumRecords(),
                        rubixFile.getKeyClass(), rubixFile.getValueClass(), rubixFile.getSchema(),
                        rubixFile.getBlockSerializationType());

                splits.add(split);
            }

        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (InstantiationException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    return splits;
}

From source file:com.linkedin.cubert.io.virtual.VirtualInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    int numMappers = job.getConfiguration().getInt("mappers", -1);
    if (numMappers == -1)
        throw new RuntimeException("Number of mappers not set for virtual input format.");

    List<InputSplit> splits = new ArrayList<InputSplit>(numMappers);
    for (int i = 0; i < numMappers; i++)
        splits.add(new VirtualInputSplit<K, V>());
    return splits;
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroInputFormat.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    return MapRedUtil.getAllFileRecursively(super.listStatus(job), job.getConfiguration());
}