List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.GFOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); validateConfiguration(conf);/* www . ja v a2 s. c o m*/ }
From source file:com.github.sadikovi.hadoop.riff.RiffOutputCommitter.java
License:Open Source License
@Override public void commitJob(JobContext jobContext) throws IOException { LOG.info("Commit job for path {}", outputPath); super.commitJob(jobContext); writeMetadataFile(jobContext.getConfiguration(), outputPath); }
From source file:com.github.seqware.queryengine.plugins.runners.hbasemr.MRHBasePluginRunner.java
License:Open Source License
public static Class transferConfiguration(JobContext context, JobRunParameterInterface inter) { Configuration conf = context.getConfiguration(); String[] strings = conf.getStrings(MRHBasePluginRunner.EXT_PARAMETERS); Logger.getLogger(PluginRunnerMapper.class.getName()) .info("QEMapper configured with: host: " + Constants.Term.HBASE_PROPERTIES.getTermValue(Map.class).toString() + " namespace: " + Constants.Term.NAMESPACE.getTermValue(String.class)); final String mapParameter = strings[SETTINGS_MAP]; if (mapParameter != null && !mapParameter.isEmpty()) { Map<String, String> settingsMap = (Map<String, String>) ((Object[]) SerializationUtils .deserialize(Base64.decodeBase64(mapParameter)))[EXTERNAL_PARAMETERS]; if (settingsMap != null) { Logger.getLogger(FeatureSetCountPlugin.class.getName()) .info("Settings map retrieved with " + settingsMap.size() + " entries"); Constants.setSETTINGS_MAP(settingsMap); }//from w ww . j a v a 2 s . c om } Logger.getLogger(PluginRunnerMapper.class.getName()) .info("QEMapper configured with: host: " + Constants.Term.HBASE_PROPERTIES.getTermValue(Map.class).toString() + " namespace: " + Constants.Term.NAMESPACE.getTermValue(String.class)); final String externalParameters = strings[EXTERNAL_PARAMETERS]; if (externalParameters != null && !externalParameters.isEmpty()) { inter.setExt_parameters( (Object[]) SerializationUtils.deserialize(Base64.decodeBase64(externalParameters))); } final String internalParameters = strings[INTERNAL_PARAMETERS]; if (internalParameters != null && !internalParameters.isEmpty()) { inter.setInt_parameters( (Object[]) SerializationUtils.deserialize(Base64.decodeBase64(internalParameters))); } final String sourceSets = strings[NUM_AND_SOURCE_FEATURE_SETS]; if (sourceSets != null && !sourceSets.isEmpty()) { List<FeatureSet> sSets = convertBase64StrToFeatureSets(sourceSets); inter.setSourceSets(sSets); } final String destSetParameter = strings[DESTINATION_FEATURE_SET]; if (destSetParameter != null && !destSetParameter.isEmpty()) { inter.setDestSet(SWQEFactory.getSerialization().deserialize(Base64.decodeBase64(destSetParameter), FeatureSet.class)); } final String pluginParameter = strings[PLUGIN_CLASS]; if (pluginParameter != null && !pluginParameter.isEmpty()) { Object deserialize = SerializationUtils.deserialize(Base64.decodeBase64(pluginParameter)); Class plugin = (Class) deserialize; return plugin; } throw new RuntimeException("Could not determine plugin to run"); }
From source file:com.google.appengine.tools.mapreduce.AppEngineJobContextTest.java
License:Apache License
public void testGetJobContextFromRequest() { JobID jobId = new JobID("foo", 1); HttpServletRequest req = createMockMapReduceRequest(jobId); replay(req);/*from w w w . java 2 s . co m*/ Configuration conf = ConfigurationXmlUtil.getConfigurationFromXml(SIMPLE_CONF_XML); persistMRState(jobId, conf); JobContext context = new AppEngineJobContext(req); assertEquals("/tmp/foo", context.getConfiguration().get("foo.bar")); assertEquals(jobId.toString(), context.getJobID().toString()); verify(req); }
From source file:com.google.appengine.tools.mapreduce.BlobstoreInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { Configuration configuration = context.getConfiguration(); String blobKey = configuration.get(BLOB_KEYS); int shardCount = configuration.getInt(SHARD_COUNT, DEFAULT_SHARD_COUNT); long blobSize = blobKeyToSize.apply(blobKey); return getSplits(blobKey, blobSize, shardCount); }
From source file:com.google.appengine.tools.mapreduce.BlobstoreInputFormatTest.java
License:Apache License
/** * Tests that public {@code getSplits} passes expected arguments to package * private one from the {@code JobContext}. *//*from w w w . j a v a2s. c om*/ public void test_getSplits() throws Exception { String blobKey = "blobKey"; int shardCount = 3; long blobSize = 1024; IMocksControl control = EasyMock.createControl(); @SuppressWarnings("unchecked") Function<String, Long> blobKeyToSize = control.createMock(Function.class); JobContext jobContext = control.createMock(JobContext.class); Configuration configuration = control.createMock(Configuration.class); EasyMock.expect(jobContext.getConfiguration()).andReturn(configuration).anyTimes(); EasyMock.expect(configuration.get(BlobstoreInputFormat.BLOB_KEYS)).andReturn(blobKey); EasyMock.expect( configuration.getInt(BlobstoreInputFormat.SHARD_COUNT, BlobstoreInputFormat.DEFAULT_SHARD_COUNT)) .andReturn(shardCount); EasyMock.expect(blobKeyToSize.apply(blobKey)).andReturn(blobSize); // this is what we are testing control.replay(); BlobstoreInputFormat inputFormat = new BlobstoreInputFormat(); inputFormat.setBlobKeyToSize(blobKeyToSize); assertSplits(inputFormat.getSplits(jobContext), blobKey, blobSize, shardCount); control.verify(); }
From source file:com.google.appengine.tools.mapreduce.DatastoreInputFormat.java
License:Apache License
/** * Generates a set of InputSplits partitioning a particular entity kind in * the datastore. The context's configuration must define a value for the * {@value #ENTITY_KIND_KEY} attribute, which will be the entity kind * partitioned, as well as a value for {@value #SHARD_COUNT_KEY} attribute, * which will be the maximum number of shards to split into. *//* w w w. j a v a 2 s. co m*/ public List<InputSplit> getSplits(JobContext context) throws IOException { String entityKind = context.getConfiguration().get(ENTITY_KIND_KEY); if (entityKind == null) { throw new IOException("No entity kind specified in job."); } log.info("Getting input splits for: " + entityKind); DatastoreService datastoreService = DatastoreServiceFactory.getDatastoreService(); Key startKey = getStartKey(entityKind, datastoreService); if (startKey == null) { return new ArrayList<InputSplit>(); } int shardCount = context.getConfiguration().getInt(SHARD_COUNT_KEY, DEFAULT_SHARD_COUNT); int desiredScatterResultCount = shardCount * SCATTER_OVERSAMPLE_FACTOR; // NB(frew): If scatter doesn't exist (as in the 1.4.0 dev_appserver) // then we'll just end up with one split. This seems reasonable. Query scatter = new Query(entityKind).addSort(SCATTER_RESERVED_PROPERTY).setKeysOnly(); List<Entity> scatterList = datastoreService.prepare(scatter).asList(withLimit(desiredScatterResultCount)); Collections.sort(scatterList, new Comparator<Entity>() { public int compare(Entity e1, Entity e2) { return e1.getKey().compareTo(e2.getKey()); } }); List<Key> splitKeys = new ArrayList(shardCount); // Possibly use a lower oversampling factor if there aren't enough scatter // property-containing entities to fill out the list. int usedOversampleFactor = Math.max(1, scatterList.size() / shardCount); log.info("Requested " + desiredScatterResultCount + " scatter entities. Got " + scatterList.size() + " so using oversample factor " + usedOversampleFactor); // We expect the points to be uniformly randomly distributed. So we // act like the first point is the start key (which we alread know) and // omit it. This converges on correct as the number of samples goes // to infinity. for (int i = 1; i < shardCount; i++) { // This can happen if we don't have as many scatter properties as we want. if (i * usedOversampleFactor >= scatterList.size()) { break; } splitKeys.add(scatterList.get(i * usedOversampleFactor).getKey()); } return getSplitsFromSplitPoints(startKey, splitKeys); }
From source file:com.google.appengine.tools.mapreduce.RangeInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); long shardCount = conf.getInt(SHARD_COUNT_KEY, DEFAULT_SHARD_COUNT); long rangeStart = getNonNegativeLong(conf, RANGE_START_KEY); long rangeEnd = getNonNegativeLong(conf, RANGE_END_KEY); if (rangeStart >= rangeEnd) { throw new InvalidConfigurationException("Invalid range. Start: " + rangeStart + " >= end: " + rangeEnd); }//from ww w . ja va2s . c om double increment = ((double) rangeEnd - rangeStart) / shardCount; ArrayList<InputSplit> splits = new ArrayList<InputSplit>(); for (int i = 0; i < shardCount - 1; i++) { splits.add(new RangeInputSplit(rangeStart + Math.round(i * increment), rangeStart + Math.round((i + 1) * increment))); } // Make sure that the final split hits end splits.add(new RangeInputSplit(rangeStart + Math.round((shardCount - 1) * increment), rangeEnd)); return splits; }
From source file:com.hadoop.mapreduce.LzoTextInputFormat.java
License:Open Source License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> files = super.listStatus(job); FileSystem fs = FileSystem.get(job.getConfiguration()); String fileExtension = new LzopCodec().getDefaultExtension(); for (Iterator<FileStatus> iterator = files.iterator(); iterator.hasNext();) { FileStatus fileStatus = (FileStatus) iterator.next(); Path file = fileStatus.getPath(); if (!file.toString().endsWith(fileExtension)) { //get rid of non lzo files iterator.remove();/*from w w w . j a v a2 s .c om*/ } else { //read the index file LzoIndex index = readIndex(file, fs); indexes.put(file, index); } } return files; }
From source file:com.hadoop.mapreduce.LzoTextInputFormat.java
License:Open Source License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = super.getSplits(job); // find new start/ends of the filesplit that aligns // with the lzo blocks List<InputSplit> result = new ArrayList<InputSplit>(); FileSystem fs = FileSystem.get(job.getConfiguration()); for (InputSplit genericSplit : splits) { // load the index FileSplit fileSplit = (FileSplit) genericSplit; Path file = fileSplit.getPath(); LzoIndex index = indexes.get(file); if (index == null) { throw new IOException("Index not found for " + file); }// ww w . ja va 2s . c o m if (index.isEmpty()) { // empty index, keep as is result.add(fileSplit); continue; } long start = fileSplit.getStart(); long end = start + fileSplit.getLength(); if (start != 0) { // find the next block position from // the start of the split long newStart = index.findNextPosition(start); if (newStart == -1 || newStart >= end) { // just skip this since it will be handled by another split continue; } start = newStart; } long newEnd = index.findNextPosition(end); if (newEnd != -1) { end = newEnd; } else { //didn't find the next position //we have hit the end of the file end = fs.getFileStatus(file).getLen(); } result.add(new FileSplit(file, start, end - start, fileSplit.getLocations())); } return result; }