List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:bucket_sort.NLineInputFormat.java
License:Apache License
/** * Logically splits the set of input files for the job, splits N lines * of the input as one split./* w w w . j a va 2s . c o m*/ * * @see FileInputFormat#getSplits(JobContext) */ public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = new ArrayList<InputSplit>(); int numLinesPerSplit = getNumLinesPerSplit(job); for (FileStatus status : listStatus(job)) { splits.addAll(getSplitsForFile(status, job.getConfiguration(), numLinesPerSplit)); } return splits; }
From source file:bucket_sort.NLineInputFormat.java
License:Apache License
/** * Get the number of lines per split/* w ww.j ava2s . c om*/ * @param job the job * @return the number of lines per split */ public static int getNumLinesPerSplit(JobContext job) { return job.getConfiguration().getInt(LINES_PER_MAP, 1); }
From source file:ca.uwaterloo.iss4e.hadoop.io.CartesianInputFormat.java
License:Open Source License
private List<InputSplit> getInputSplits(JobContext jobContext, String inputFormatClass, Path path) throws ClassNotFoundException, IOException { Configuration conf = jobContext.getConfiguration(); FileInputFormat inputFormat = (FileInputFormat) ReflectionUtils.newInstance(Class.forName(inputFormatClass), conf);/*from ww w. j ava 2 s . c om*/ // Set the input path for the left data set path = path.getFileSystem(conf).makeQualified(path); String dirStr = StringUtils.escapeString(path.toString()); String dirs = conf.get(INPUT_DIR); conf.set(INPUT_DIR, dirStr); return inputFormat.getSplits(jobContext); }
From source file:ca.uwaterloo.iss4e.hadoop.io.CartesianInputFormat.java
License:Open Source License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { try {/*ww w .j a v a 2 s . c o m*/ // Get the input splits from both the left and right data sets Configuration conf = job.getConfiguration(); List<InputSplit> leftSplits = getInputSplits(job, conf.get(LEFT_INPUT_FORMAT), new Path(conf.get(LEFT_INPUT_PATH))); List<InputSplit> rightSplits = getInputSplits(job, conf.get(RIGHT_INPUT_FORMAT), new Path(conf.get(RIGHT_INPUT_PATH))); // Create our CompositeInputSplits, size equal to left.length * // right.length List<InputSplit> compoisteInputSplits = new ArrayList<InputSplit>(); // For each of the left input splits for (InputSplit left : leftSplits) { // For each of the right input splits for (InputSplit right : rightSplits) { // Create a new composite input split composing of the // two CompositeInputSplit returnSplits = new CompositeInputSplit(2); returnSplits.add(left); returnSplits.add(right); compoisteInputSplits.add(returnSplits); } } // Return the composite splits LOG.info("Total CompositeSplits to process: " + compoisteInputSplits.size()); return compoisteInputSplits; } catch (ClassNotFoundException e) { e.printStackTrace(); throw new IOException(e); } catch (InterruptedException e) { e.printStackTrace(); throw new IOException(e); } }
From source file:cloudbase.core.client.mapreduce.CloudbaseOutputFormatShim.java
License:Apache License
protected static Instance getInstance(JobContext job) { Configuration conf = job.getConfiguration(); if (isMock(conf)) { return new MockInstanceShim(conf.get(INSTANCE_NAME)); }// w w w . j a v a 2 s .c o m return CloudbaseOutputFormat.getInstance(job); }
From source file:cloudbase.core.client.mapreduce.CloudbaseOutputFormatShim.java
License:Apache License
public static void setMockInstance(JobContext job, String instanceName) { Configuration conf = job.getConfiguration(); conf.setBoolean(INSTANCE_HAS_BEEN_SET, true); conf.setBoolean(MOCK, true);/*w w w . ja va2 s . c om*/ conf.set(INSTANCE_NAME, instanceName); }
From source file:cloudbase.core.client.mapreduce.CloudbaseOutputFormatShim.java
License:Apache License
@Override public void checkOutputSpecs(JobContext job) throws IOException { if (isMock(job.getConfiguration())) { try {/* w w w . j av a2 s. co m*/ Connector c = CloudbaseOutputFormatShim.getInstance(job).getConnector(getUsername(job), getPassword(job)); if (!c.securityOperations().authenticateUser(getUsername(job), getPassword(job))) throw new IOException("Unable to authenticate user"); } catch (CBException e) { throw new IOException(e); } catch (CBSecurityException e) { throw new IOException(e); } } else { super.checkOutputSpecs(job); } }
From source file:cn.uc.hadoop.mapreduce.lib.input.FileNameTextInputFormat.java
License:Apache License
@Override protected boolean isSplitable(JobContext context, Path file) { final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); if (null == codec) { return true; }//from ww w . j av a 2 s. c o m return codec instanceof SplittableCompressionCodec; }
From source file:co.cask.cdap.data.stream.AbstractStreamInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); long ttl = conf.getLong(STREAM_TTL, Long.MAX_VALUE); long endTime = conf.getLong(EVENT_END_TIME, Long.MAX_VALUE); long startTime = Math.max(conf.getLong(EVENT_START_TIME, 0L), getCurrentTime() - ttl); long maxSplitSize = conf.getLong(MAX_SPLIT_SIZE, Long.MAX_VALUE); long minSplitSize = Math.min(conf.getLong(MIN_SPLIT_SIZE, 1L), maxSplitSize); StreamInputSplitFinder<InputSplit> splitFinder = StreamInputSplitFinder .builder(URI.create(conf.get(STREAM_PATH))).setStartTime(startTime).setEndTime(endTime) .setMinSplitSize(minSplitSize).setMaxSplitSize(maxSplitSize).build(splitFactory); return splitFinder.getSplits(conf); }
From source file:co.cask.cdap.etl.batch.mapreduce.MapReduceTransformExecutorFactory.java
License:Apache License
public MapReduceTransformExecutorFactory(MapReduceTaskContext taskContext, PipelinePluginInstantiator pluginInstantiator, Metrics metrics, Map<String, Map<String, String>> pluginRuntimeArgs) { super(pluginInstantiator, metrics); this.taskContext = taskContext; this.pluginRuntimeArgs = pluginRuntimeArgs; JobContext hadoopContext = (JobContext) taskContext.getHadoopContext(); Configuration hConf = hadoopContext.getConfiguration(); this.mapOutputKeyClassName = hConf.get(ETLMapReduce.GROUP_KEY_CLASS); this.mapOutputValClassName = hConf.get(ETLMapReduce.GROUP_VAL_CLASS); this.isMapper = hadoopContext instanceof Mapper.Context; }