List of usage examples for org.apache.hadoop.mapreduce JobID toString
@Override
public String toString()
From source file:com.google.appengine.tools.mapreduce.AppEngineJobContextTest.java
License:Apache License
public void testGetJobContextFromRequest() { JobID jobId = new JobID("foo", 1); HttpServletRequest req = createMockMapReduceRequest(jobId); replay(req);// ww w. ja va 2 s . co m Configuration conf = ConfigurationXmlUtil.getConfigurationFromXml(SIMPLE_CONF_XML); persistMRState(jobId, conf); JobContext context = new AppEngineJobContext(req); assertEquals("/tmp/foo", context.getConfiguration().get("foo.bar")); assertEquals(jobId.toString(), context.getJobID().toString()); verify(req); }
From source file:com.google.appengine.tools.mapreduce.MapReduceServlet.java
License:Apache License
private Configuration reducerConfigFromMapperConfig(JobID mapperJobID, MapReduceState mapperState) { Configuration reducerConfig = ConfigurationXmlUtil .getConfigurationFromXml(mapperState.getConfigurationXML()); // define input for the reducing "mapper" as a function of our just completed mapping job reducerConfig.set("mapreduce.inputformat.class", IntermediateInputFormat.class.getName()); reducerConfig.set(IntermediateInputFormat.MAPPER_JOBID_KEY, mapperJobID.toString()); reducerConfig.set(IntermediateInputFormat.MIN_OUTPUT_KEY, mapperState.getOutputKeyRange().getMinKey()); reducerConfig.set(IntermediateInputFormat.MAX_OUTPUT_KEY, mapperState.getOutputKeyRange().getMaxKey()); // defined the "mapper" that handles the reduction process reducerConfig.set("mapreduce.map.class", ReducingMapper.class.getName()); if (reducerConfig.get(AppEngineJobContext.REDUCER_SHARD_COUNT_KEY) != null) { reducerConfig.set(AppEngineJobContext.MAPPER_SHARD_COUNT_KEY, reducerConfig.get(AppEngineJobContext.REDUCER_SHARD_COUNT_KEY)); }/*from w w w . ja v a 2s . c o m*/ return reducerConfig; }
From source file:com.google.appengine.tools.mapreduce.MapReduceState.java
License:Apache License
/** * Generates a MapReduceState that's configured with the given parameters, is * set as active, and has made no progress as of yet. * /*from w ww. ja va 2 s . c o m*/ * The MapReduceState needs to have a configuration set via * {@code #setConfigurationXML(String)} before it can be persisted. * * @param service the datastore to persist the MapReduceState to * @string name user visible name for this MapReduce * @param jobId the JobID this MapReduceState corresponds to * @param time start time for this MapReduce, in milliseconds from the epoch * @return the initialized MapReduceState */ public static MapReduceState generateInitializedMapReduceState(DatastoreService service, String name, JobID jobId, long time) { MapReduceState state = new MapReduceState(service); state.entity = new Entity("MapReduceState", jobId.toString()); state.setName(name); state.entity.setProperty(PROGRESS_PROPERTY, 0.0); state.entity.setProperty(STATUS_PROPERTY, "" + Status.ACTIVE); state.entity.setProperty(START_TIME_PROPERTY, time); state.entity.setUnindexedProperty(CHART_PROPERTY, new Text("")); state.setCounters(new Counters()); state.setActiveShardCount(0); state.setShardCount(0); return state; }
From source file:com.google.appengine.tools.mapreduce.MapReduceState.java
License:Apache License
/** * Gets the MapReduceState corresponding to the given job ID. * /* ww w . j av a2 s . com*/ * @param service the datastore to use for persistence * @param jobId the JobID to retrieve the MapReduceState for * @return the corresponding MapReduceState * @throws EntityNotFoundException if there is no MapReduceState corresponding * to the given JobID */ public static MapReduceState getMapReduceStateFromJobID(DatastoreService service, JobID jobId) throws EntityNotFoundException { Key key = KeyFactory.createKey("MapReduceState", jobId.toString()); MapReduceState state = new MapReduceState(service); state.entity = service.get(key); return state; }
From source file:com.google.appengine.tools.mapreduce.ShardState.java
License:Apache License
/** * Gets all shard states corresponding to a particular Job ID *//* w w w . ja v a 2s. c om*/ public static List<ShardState> getShardStatesFromJobID(DatastoreService service, JobID jobId) { List<Entity> shardStateEntities = service .prepare(new Query("ShardState").addFilter(JOB_ID_PROPERTY, FilterOperator.EQUAL, jobId.toString())) .asList(FetchOptions.Builder.withLimit(1000)); List<ShardState> shardStates = new ArrayList<ShardState>(shardStateEntities.size()); for (Entity entity : shardStateEntities) { ShardState shardState = new ShardState(service); shardState.entity = entity; shardStates.add(shardState); } return shardStates; }
From source file:com.inmobi.conduit.local.CopyMapper.java
License:Apache License
private Path getJobTmpDir(Context context, JobID jobId) { return new Path(new Path(context.getConfiguration().get(LOCALSTREAM_TMP_PATH)), jobId.toString()); }
From source file:com.inmobi.databus.local.CopyMapper.java
License:Apache License
private Path getJobTmpDir(Context context, JobID jobId) { return new Path(new Path(context.getConfiguration().get("localstream.tmp.path")), jobId.toString()); }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.SubmittedJob.java
License:Apache License
SubmittedJob(JobID jobID, String jobSubmitDirectory, Credentials credentials, Configuration configuration) throws IOException, InterruptedException { this.jobID = jobID; this.configuration = configuration; this.jobSubmitDirectoryPath = new Path(jobSubmitDirectory); this.fileSystem = FileSystem.get(configuration); JobSplit.TaskSplitMetaInfo splitInfo[] = SplitMetaInfoReader.readSplitMetaInfo(jobID, fileSystem, configuration, jobSubmitDirectoryPath); Path jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDirectoryPath); FSDataInputStream stream = fileSystem.open(jobSplitFile); for (JobSplit.TaskSplitMetaInfo info : splitInfo) { Object split = getSplitDetails(stream, info.getStartOffset(), configuration); inputSplits.add(split);// w w w. j a va 2 s. c om splitLocations.put(split, info.getLocations()); LOG.info("Adding split for execution. Split = " + split + " Locations: " + Arrays.toString(splitLocations.get(split))); } stream.close(); jobConfPath = JobSubmissionFiles.getJobConfPath(jobSubmitDirectoryPath); if (!fileSystem.exists(jobConfPath)) { throw new IOException("Cannot find job.xml. Path = " + jobConfPath); } //We cannot just use JobConf(Path) constructor, //because it does not work for HDFS locations. //The comment in Configuration#loadResource() states, //for the case when the Path to the resource is provided: //"Can't use FileSystem API or we get an infinite loop //since FileSystem uses Configuration API. Use java.io.File instead." // //Workaround: construct empty Configuration, provide it with //input stream and give it to JobConf constructor. FSDataInputStream jobConfStream = fileSystem.open(jobConfPath); Configuration jobXML = new Configuration(false); jobXML.addResource(jobConfStream); //The configuration does not actually gets read before we attempt to //read some property. Call to #size() will make Configuration to //read the input stream. jobXML.size(); //We are done with input stream, can close it now. jobConfStream.close(); jobConf = new JobConf(jobXML); newApi = jobConf.getUseNewMapper(); jobStatus = new JobStatus(jobID, 0f, 0f, 0f, 0f, JobStatus.State.RUNNING, JobPriority.NORMAL, UserGroupInformation.getCurrentUser().getUserName(), jobID.toString(), jobConfPath.toString(), ""); }
From source file:com.scaleoutsoftware.soss.hserver.JobScheduler.java
License:Apache License
/** * Runs the map-reduce job on ScaleOut hServer. * * @param jobID the id of the job * @param jobConf the job to run//w w w . j a v a2 s. c om * @param isNewApi if the job uses the new MapReduce APIs * @param splitType the type of the split * @param inputSplits the list of input splits * @param splitLocations the locations of the splits * @param grid the invocation grid to run the job * @throws IOException if errors occurred during the job * @throws InterruptedException if the processing thread is interrupted * @throws ClassNotFoundException if the invocation grid does not contain the dependency class */ @SuppressWarnings("unchecked") public void runPredefinedJob(JobID jobID, JobConf jobConf, boolean isNewApi, Class splitType, List<?> inputSplits, Map<Object, String[]> splitLocations, InvocationGrid grid) throws IOException, InterruptedException, ClassNotFoundException { //Initialize user credential in advance long time = System.currentTimeMillis(); CreateUserCredentials.run(grid); String hadoopVersion = VersionInfo.getVersion(); int appID = 0xFFFFFFF & BitConverter.hashStringOneInt(jobID.toString()); try { org.apache.hadoop.mapreduce.OutputCommitter outputCommitter = createOutputCommitter(isNewApi, jobID, jobConf); HadoopVersionSpecificCode hadoopVersionSpecificCode = HadoopVersionSpecificCode .getInstance(hadoopVersion, jobConf); org.apache.hadoop.mapred.JobContext jobContext = hadoopVersionSpecificCode.createJobContext(jobConf, jobID); outputCommitter.setupJob(jobContext); //clear all temporary objects DataAccessor.clearObjects(appID); //Calculating the partition layout com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping hostNameToPartition = com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping .getCurrent(); List<InetAddress> hostAddresses = new ArrayList<InetAddress>(hostNameToPartition.getHosts()); //Generating mapping of Hadoop partitions to SOSS partitions, so they are equally distributed across hosts int numHosts = hostAddresses.size(); int numberOfSlotsPerNode = Math .max(grid != null ? grid.getMaxNumberOfCores() : Runtime.getRuntime().availableProcessors(), 1); //Generating split to hostname map Map<InetAddress, List<Integer>> splitToHostAddress = assignSplitsToHost(inputSplits, hostAddresses, splitLocations); int[] partitionMapping = hostNameToPartition.generateEvenItemDistribution(jobConf.getNumReduceTasks()); HadoopInvocationParameters hadoopParameters = new HadoopInvocationParameters(jobConf, jobID, !isNewApi); HServerInvocationParameters parameters = new HServerInvocationParameters(hadoopParameters, appID, partitionMapping, hostNameToPartition, numberOfSlotsPerNode, splitType, inputSplits, splitToHostAddress, false, HServerParameters.getBooleanSetting(HServerParameters.SORT_KEYS, jobConf), hadoopVersion, null, SerializationMode.DEFAULT); StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append("Splits created:\n"); for (InetAddress address : splitToHostAddress.keySet()) { stringBuilder.append("Host "); stringBuilder.append(address); stringBuilder.append(" has "); stringBuilder.append(splitToHostAddress.get(address).size()); stringBuilder.append(" splits.\n"); } System.out.println(stringBuilder.toString()); System.out.println("Job initialization completed in " + (System.currentTimeMillis() - time) + " ms."); time = System.currentTimeMillis(); InvokeResult<MapperResult> mapInvokeResult = MessagingHelper.invoke(grid, RunMapper.MapperInvokable.class, parameters, TimeSpan.INFINITE_TIMEOUT.getSeconds()); if (mapInvokeResult.getErrors() != null && mapInvokeResult.getErrors().size() > 0) { throw new IOException("Map invocation failed.", mapInvokeResult.getErrors().get(0)); } System.out.println("Map invocation done in " + (System.currentTimeMillis() - time) + " ms."); time = System.currentTimeMillis(); MapperResult resultObject = mapInvokeResult.getResult(); if (resultObject == null || mapInvokeResult.getNumFailed() != 0) { throw new IOException("Mapper invocation failed. Num failed = " + mapInvokeResult.getNumFailed()); } if (resultObject.getNumberOfSplitsProcessed() != inputSplits.size()) { throw new IOException("Number of splits does not match the number of invocations. Nsplits = " + inputSplits.size() + ", Ninvokes =" + resultObject.getNumberOfSplitsProcessed()); } if (partitionMapping.length > 0) { //Running the reduce step InvokeResult<Integer> reduceInvokeResult = MessagingHelper.invoke(grid, ReduceInvokable.class, appID, TimeSpan.INFINITE_TIMEOUT.getSeconds()); System.out.println("Reduce invocation done in " + (System.currentTimeMillis() - time) + " ms."); DataAccessor.clearObjects(appID); //clear all temporary objects if (reduceInvokeResult.getErrors() != null && reduceInvokeResult.getErrors().size() > 0) { throw new IOException("Reduce invocation failed.", reduceInvokeResult.getErrors().get(0)); } if (reduceInvokeResult.getNumFailed() != 0) { throw new IOException("Reduce invocation failed."); } if (reduceInvokeResult.getResult() != partitionMapping.length) { throw new IOException("Not all partitions were reduced. Expected = " + partitionMapping.length + " Actual = " + reduceInvokeResult.getResult()); } } outputCommitter.commitJob(jobContext); } catch (StateServerException e) { throw new IOException("ScaleOut hServer access error.", e); } }
From source file:com.scaleoutsoftware.soss.hserver.JobScheduler.java
License:Apache License
/** * Runs the map-reduce job on ScaleOut hServer.* * * @param job the job to run//from w w w.jav a2s .co m * @param jobId the id of the job * @param sortEnabled if key sorting is enabled * @param jobParameter user defined parameter object for the job * @param grid the invocation grid to run the job * @throws IOException if errors occurred during the job * @throws InterruptedException if the processing thread is interrupted * @throws ClassNotFoundException if the invocation grid does not contain the dependency class */ @SuppressWarnings("unchecked") public void runOldApiJob(JobConf job, org.apache.hadoop.mapred.JobID jobId, boolean sortEnabled, Object jobParameter, InvocationGrid grid) throws IOException, InterruptedException, ClassNotFoundException { //Initialize user credential in advance int jobAppId = 0xFFFFFFF & BitConverter.hashStringOneInt(jobId.toString()); String hadoopVersion = VersionInfo.getVersion(); long time = System.currentTimeMillis(); CreateUserCredentials.run(grid); try { //Check output specs before running the job job.getOutputFormat().checkOutputSpecs(FileSystem.get(job), job); JobContext jContext = HadoopVersionSpecificCode.getInstance(hadoopVersion, job).createJobContext(job, jobId); org.apache.hadoop.mapred.OutputCommitter outputCommitter = job.getOutputCommitter(); outputCommitter.setupJob(jContext); //clear all temporary objects DataAccessor.clearObjects(jobAppId); //Calculating the partition layout com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping hostNameToPartition = com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping .getCurrent(); List<InetAddress> hostAddresses = new ArrayList<InetAddress>(hostNameToPartition.getHosts()); //Generating mapping of Hadoop partitions to SOSS partitions, so they are equally distributed across hosts int numHosts = hostAddresses.size(); int numberOfSlotsPerNode = Math .max(grid != null ? grid.getMaxNumberOfCores() : Runtime.getRuntime().availableProcessors(), 1); //Set the number of splits to the number of cores if (NamedMapInputFormatMapred.class.isAssignableFrom(job.getInputFormat().getClass())) { int numberOfSplits = HServerParameters.getSetting(MAP_SPLITS_PER_CORE, job) * numHosts * numberOfSlotsPerNode; job.setNumMapTasks(Math.min(numberOfSplits, HServerConstants.MAX_MAP_REDUCE_TASKS)); } //Generating split to hostname map org.apache.hadoop.mapred.InputFormat inputFormat = job.getInputFormat(); List<org.apache.hadoop.mapred.InputSplit> splitList = Arrays .asList(inputFormat.getSplits(job, job.getNumMapTasks())); Map<InetAddress, List<Integer>> splitToHostAddress = assignSplitsToHost(splitList, hostAddresses, null); //Choose the optimal number of reducers for GridOutputFormat if (job.getOutputFormat() instanceof NamedMapOutputFormatMapred) { job.setNumReduceTasks(numHosts * numberOfSlotsPerNode); sortEnabled = false; } int[] partitionMapping = hostNameToPartition.generateEvenItemDistribution(job.getNumReduceTasks()); //Generating invocation parameters Class<? extends org.apache.hadoop.mapred.InputSplit> splitType = splitList.size() > 0 ? splitList.get(0).getClass() : null; HadoopInvocationParameters hadoopParameters = new HadoopInvocationParameters(job, jobId, true); HServerInvocationParameters<org.apache.hadoop.mapred.InputSplit> parameters = new HServerInvocationParameters<org.apache.hadoop.mapred.InputSplit>( hadoopParameters, jobAppId, partitionMapping, hostNameToPartition, numberOfSlotsPerNode, splitType, splitList, splitToHostAddress, false, sortEnabled, hadoopVersion, jobParameter, SerializationMode.DEFAULT); StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append("Splits created:\n"); for (InetAddress address : splitToHostAddress.keySet()) { stringBuilder.append("Host "); stringBuilder.append(address); stringBuilder.append(" has "); stringBuilder.append(splitToHostAddress.get(address).size()); stringBuilder.append(" splits.\n"); } System.out.println(stringBuilder.toString()); System.out.println("Job initialization completed in " + (System.currentTimeMillis() - time) + " ms."); time = System.currentTimeMillis(); InvokeResult<MapperResult> mapInvokeResult = MessagingHelper.invoke(grid, RunMapper.MapperInvokable.class, parameters, TimeSpan.INFINITE_TIMEOUT.getSeconds()); if (mapInvokeResult.getErrors() != null && mapInvokeResult.getErrors().size() > 0) { throw new IOException("Map invocation failed.", mapInvokeResult.getErrors().get(0)); } System.out.println("Map invocation done in " + (System.currentTimeMillis() - time) + " ms."); time = System.currentTimeMillis(); MapperResult resultObject = mapInvokeResult.getResult(); if (resultObject == null || mapInvokeResult.getNumFailed() != 0) { throw new IOException("Mapper invocation failed. Num failed = " + mapInvokeResult.getNumFailed()); } if (resultObject.getNumberOfSplitsProcessed() != splitList.size()) { throw new IOException("Number of splits does not match the number of invocations. Nsplits = " + splitList.size() + ", Ninvokes =" + resultObject.getNumberOfSplitsProcessed()); } if (partitionMapping.length > 0) { //Running the reduce step InvokeResult<Integer> reduceInvokeResult = MessagingHelper.invoke(grid, ReduceInvokable.class, jobAppId, TimeSpan.INFINITE_TIMEOUT.getSeconds()); System.out.println("Reduce invocation done in " + (System.currentTimeMillis() - time) + " ms."); DataAccessor.clearObjects(jobAppId); //clear all temporary objects if (reduceInvokeResult.getErrors() != null && reduceInvokeResult.getErrors().size() > 0) { throw new IOException("Reduce invocation failed.", reduceInvokeResult.getErrors().get(0)); } if (reduceInvokeResult.getNumFailed() != 0) { throw new IOException("Reduce invocation failed."); } if (reduceInvokeResult.getResult() != partitionMapping.length) { throw new IOException("Not all partitions were reduced. Expected = " + partitionMapping.length + " Actual = " + reduceInvokeResult.getResult()); } } outputCommitter.commitJob(jContext); } catch (StateServerException e) { throw new IOException("ScaleOut hServer access error.", e); } }