List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:com.uber.hoodie.hadoop.HoodieHiveUtil.java
License:Apache License
public static String readStartCommitTime(JobContext job, String tableName) { String startCommitTimestampName = String.format(HOODIE_START_COMMIT_PATTERN, tableName); LOG.info("Read start commit time - " + job.getConfiguration().get(startCommitTimestampName)); return job.getConfiguration().get(startCommitTimestampName); }
From source file:com.uber.hoodie.hadoop.HoodieHiveUtil.java
License:Apache License
public static String readMode(JobContext job, String tableName) { String modePropertyName = String.format(HOODIE_CONSUME_MODE_PATTERN, tableName); String mode = job.getConfiguration().get(modePropertyName, DEFAULT_SCAN_MODE); LOG.info(modePropertyName + ": " + mode); return mode;//from www .ja v a 2 s . c o m }
From source file:com.vertica.hadoop.VerticaOutputFormat.java
License:Apache License
/** {@inheritDoc} */ public void checkOutputSpecs(JobContext context) throws IOException { checkOutputSpecs(new VerticaConfiguration(context.getConfiguration())); }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { Configuration conf = job.getConfiguration(); Types rangeKeyType = DynamoDBQueryInputFormat.getRangeKeyType(conf); DynamoDBSplitter splitter = getSplitter(rangeKeyType); return splitter.split(conf); }
From source file:com.wipro.ats.bdre.datagen.mr.RangeInputFormat.java
License:Apache License
/** * Create the desired number of splits, dividing the number of rows * between the mappers.//w w w .jav a 2 s .c om */ @Override public List<InputSplit> getSplits(JobContext job) { long totalRows = getNumberOfRows(job); int numSplits = job.getConfiguration().getInt(Config.NUM_SPLITS_KEY, 1); LOG.info("Generating " + totalRows + " using " + numSplits); List<InputSplit> splits = new ArrayList<InputSplit>(); long currentRow = 0; for (int split = 0; split < numSplits; ++split) { long goal = (long) Math.ceil(totalRows * (double) (split + 1) / numSplits); splits.add(new RangeInputSplit(currentRow, goal - currentRow)); currentRow = goal; } return splits; }
From source file:com.wipro.ats.bdre.datagen.mr.RangeInputFormat.java
License:Apache License
public long getNumberOfRows(JobContext job) { return job.getConfiguration().getLong(Config.NUM_ROWS_KEY, 1); }
From source file:com.yahoo.druid.hadoop.DruidInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); final String overlordUrl = conf.get(CONF_DRUID_OVERLORD_HOSTPORT); final String storageDir = conf.get(CONF_DRUID_STORAGE_STORAGE_DIR); String dataSource = conf.get(CONF_DRUID_DATASOURCE); String intervalStr = conf.get(CONF_DRUID_INTERVAL); logger.info("druid overlord url = " + overlordUrl); logger.info("druid storage dir = " + storageDir); logger.info("druid datasource = " + dataSource); logger.info("druid datasource interval = " + intervalStr); //TODO: currently we are creating 1 split per segment which is not really //necessary, we can use some configuration to combine multiple segments into //one input split List<InputSplit> splits = Lists.transform( druid.getSegmentPathsToLoad(dataSource, new Interval(intervalStr), storageDir, overlordUrl), new Function<String, InputSplit>() { @Override/* w w w. j ava 2s .c o m*/ public InputSplit apply(String input) { return new DruidInputSplit(input); } }); logger.info("Number of splits = " + splits.size()); return splits; }
From source file:com.yahoo.ycsb.bulk.hbase.NLineInputFormat.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. *///from w w w. j a va 2s . co m public List<InputSplit> getSplits(JobContext job) throws IOException { ArrayList<InputSplit> splits = new ArrayList<InputSplit>(); for (FileStatus status : listStatus(job)) { Path fileName = status.getPath(); if (status.isDir()) { throw new IOException("Not a file: " + fileName); } this.addFileLines(fileName, splits, job.getConfiguration()); } return splits; }
From source file:com.yahoo.ycsb.bulk.hbase.RangePartitioner.java
License:Apache License
/** * Sets the hdfs file name to use, containing a newline separated list of * Base64 encoded split points that represent ranges for partitioning *//* w w w. j av a2 s . com*/ public static void setSplitFile(JobContext job, String file) { URI uri = new Path(file).toUri(); DistributedCache.addCacheFile(uri, job.getConfiguration()); job.getConfiguration().set(CUTFILE_KEY, uri.getPath()); }
From source file:com.yahoo.ycsb.bulk.hbase.RangePartitioner.java
License:Apache License
/** * Sets the number of random sub-bins per range *//*from w w w . j a v a2 s . c o m*/ public static void setNumSubBins(JobContext job, int num) { job.getConfiguration().setInt(NUM_SUBBINS, num); }