List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:com.jumptap.h2redis.RedisOutputFormat.java
License:Open Source License
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String host = conf.get(RedisDriver.REDIS_HOST); int key = conf.getInt(RedisDriver.REDIS_KEY_FIELD, -1); int hash = conf.getInt(RedisDriver.REDIS_HASHKEY_FIELD, -1); int val = conf.getInt(RedisDriver.REDIS_HASHVAL_FIELD, -1); if (host == null || host.isEmpty() || key == -1 || hash == -1 || val == -1) throw new IOException("Missing configuration param, check usage."); }
From source file:com.junz.hadoop.custom.SytsLogInputFormat.java
License:Apache License
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { Configuration job = context.getConfiguration(); List<InputSplit> splits = new ArrayList<InputSplit>(); try {/*from w ww . jav a 2s.co m*/ long startId = job.getLong(START_ID_PROPERTY, 1); long numberOfIds = job.getLong(NUMBER_LOG_PROPERTY, 1); int groups = job.getInt(NUMBER_MAP_PROPERTY, 1); long groupSize = (numberOfIds / groups); // Split the rows into n-number of chunks and adjust the last chunk // accordingly for (int i = 0; i < groups; i++) { DBInputSplit split; if ((i + 1) == groups) split = new DBInputSplit(i * groupSize + startId, numberOfIds + startId); else split = new DBInputSplit(i * groupSize + startId, (i * groupSize) + groupSize + startId); splits.add(split); } return splits; } catch (Exception e) { throw new IOException(e.getMessage()); } }
From source file:com.knewton.mapreduce.io.SSTableInputFormat.java
License:Apache License
/** * Expands all directories passed as input and keeps only valid data tables. * * @return A list of all the data tables found under the input directories. *///from w w w. j a v a 2s . c o m @Override protected List<FileStatus> listStatus(JobContext job) throws IOException { Configuration conf = job.getConfiguration(); List<FileStatus> files = super.listStatus(job); DataTablePathFilter dataTableFilter = getDataTableFilter(conf); files = cleanUpBackupDir(files); for (int i = 0; i < files.size(); i++) { FileStatus file = files.get(i); Path p = file.getPath(); // Expand if directory if (file.isDirectory() && p != null) { LOG.info("Expanding {}", p); FileSystem fs = p.getFileSystem(conf); FileStatus[] children = fs.listStatus(p); List<FileStatus> listChildren = Lists.newArrayList(children); listChildren = cleanUpBackupDir(listChildren); files.addAll(i + 1, listChildren); } if (!dataTableFilter.accept(file.getPath())) { LOG.info("Removing {}", file.getPath()); files.remove(i); i--; } } return files; }
From source file:com.linkedin.cubert.io.avro.PigAvroInputFormatAdaptor.java
License:Open Source License
@Override protected boolean isSplitable(JobContext context, Path filename) { Configuration conf = context.getConfiguration(); return !conf.getBoolean("cubert.avro.input.unsplittable", false); }
From source file:com.linkedin.cubert.io.avro.PigAvroInputFormatAdaptor.java
License:Open Source License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { return getDelegate(job.getConfiguration()).getSplits(job); }
From source file:com.linkedin.cubert.io.CubertInputFormat.java
License:Open Source License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); ConfigurationDiff confDiff = new ConfigurationDiff(conf); int numMultiMappers = confDiff.getNumDiffs(); List<InputSplit> splits = new ArrayList<InputSplit>(); for (int mapperIndex = 0; mapperIndex < numMultiMappers; mapperIndex++) { // reset conf to multimapper i confDiff.applyDiff(mapperIndex); // get the actual input format InputFormat<K, V> actualInputFormat = getActualInputFormat(context); List<InputSplit> actualSplits = null; // check if combined input split is requested boolean combineSplit = conf.getBoolean(CubertStrings.COMBINED_INPUT, false); if (combineSplit) { // Create CombinedFileInputFormat CombineFileInputFormat<K, V> cfif = new CombineFileInputFormat<K, V>() { @Override// w w w.j ava 2 s.c om public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException { throw new IllegalStateException("Should not be called"); } }; // get the splits actualSplits = cfif.getSplits(context); } else { actualSplits = actualInputFormat.getSplits(context); } // embed each split in MultiMapperSplit and add to list for (InputSplit actualSplit : actualSplits) splits.add(new MultiMapperSplit(actualSplit, mapperIndex)); // undo the diff confDiff.undoDiff(mapperIndex); } return splits; }
From source file:com.linkedin.cubert.io.MultiMapperInputFormat.java
License:Open Source License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); int numMultiMappers = conf.getInt(CubertStrings.NUM_MULTI_MAPPERS, 1); List<InputSplit> splits = new ArrayList<InputSplit>(); for (int i = 0; i < numMultiMappers; i++) { String dirs = conf.get(CubertStrings.MAPRED_INPUT_DIR + i); conf.set("mapred.input.dir", dirs); List<InputSplit> mapperSplits = getDelegate(context.getConfiguration(), i).getSplits(context); for (InputSplit split : mapperSplits) { splits.add(new MultiMapperSplit((FileSplit) split, i)); }/*from www.j a va2 s . c o m*/ } return splits; }
From source file:com.linkedin.cubert.io.rubix.RubixInputFormat.java
License:Open Source License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> files = listStatus(job); FileSystem.get(job.getConfiguration()); for (FileStatus file : files) { Path path = file.getPath(); RubixFile<K, V> rubixFile = new RubixFile<K, V>(job.getConfiguration(), path); List<KeyData<K>> keyDataList; try {//from w w w. ja v a2 s . c o m keyDataList = rubixFile.getKeyData(); for (KeyData<K> keyData : keyDataList) { InputSplit split = new RubixInputSplit<K, V>(job.getConfiguration(), path, keyData.getKey(), keyData.getOffset(), keyData.getLength(), keyData.getBlockId(), keyData.getNumRecords(), rubixFile.getKeyClass(), rubixFile.getValueClass(), rubixFile.getSchema(), rubixFile.getBlockSerializationType()); splits.add(split); } } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InstantiationException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IllegalAccessException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return splits; }
From source file:com.linkedin.cubert.io.virtual.VirtualInputFormat.java
License:Open Source License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { int numMappers = job.getConfiguration().getInt("mappers", -1); if (numMappers == -1) throw new RuntimeException("Number of mappers not set for virtual input format."); List<InputSplit> splits = new ArrayList<InputSplit>(numMappers); for (int i = 0; i < numMappers; i++) splits.add(new VirtualInputSplit<K, V>()); return splits; }
From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroInputFormat.java
License:Apache License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { return MapRedUtil.getAllFileRecursively(super.listStatus(job), job.getConfiguration()); }