Example usage for org.apache.hadoop.mapreduce JobID toString

List of usage examples for org.apache.hadoop.mapreduce JobID toString


In this page you can find the example usage for org.apache.hadoop.mapreduce JobID toString.


    public String toString() 

Source Link


From source file:com.google.appengine.tools.mapreduce.AppEngineJobContextTest.java

License:Apache License

public void testGetJobContextFromRequest() {
    JobID jobId = new JobID("foo", 1);
    HttpServletRequest req = createMockMapReduceRequest(jobId);
    replay(req);//  ww  w. ja va  2 s . co  m

    Configuration conf = ConfigurationXmlUtil.getConfigurationFromXml(SIMPLE_CONF_XML);
    persistMRState(jobId, conf);

    JobContext context = new AppEngineJobContext(req);
    assertEquals("/tmp/foo", context.getConfiguration().get("foo.bar"));
    assertEquals(jobId.toString(), context.getJobID().toString());

From source file:com.google.appengine.tools.mapreduce.MapReduceServlet.java

License:Apache License

private Configuration reducerConfigFromMapperConfig(JobID mapperJobID, MapReduceState mapperState) {

    Configuration reducerConfig = ConfigurationXmlUtil

    // define input for the reducing "mapper" as a function of our just completed mapping job
    reducerConfig.set("mapreduce.inputformat.class", IntermediateInputFormat.class.getName());
    reducerConfig.set(IntermediateInputFormat.MAPPER_JOBID_KEY, mapperJobID.toString());
    reducerConfig.set(IntermediateInputFormat.MIN_OUTPUT_KEY, mapperState.getOutputKeyRange().getMinKey());
    reducerConfig.set(IntermediateInputFormat.MAX_OUTPUT_KEY, mapperState.getOutputKeyRange().getMaxKey());

    // defined the "mapper" that handles the reduction process
    reducerConfig.set("mapreduce.map.class", ReducingMapper.class.getName());

    if (reducerConfig.get(AppEngineJobContext.REDUCER_SHARD_COUNT_KEY) != null) {
    }/*from w w w  . ja  v  a  2s  . c  o  m*/
    return reducerConfig;

From source file:com.google.appengine.tools.mapreduce.MapReduceState.java

License:Apache License

 * Generates a MapReduceState that's configured with the given parameters, is
 * set as active, and has made no progress as of yet.
 * /*from w  ww. ja va 2 s  . c o m*/
 * The MapReduceState needs to have a configuration set via
 * {@code #setConfigurationXML(String)} before it can be persisted.
 * @param service the datastore to persist the MapReduceState to
 * @string name user visible name for this MapReduce
 * @param jobId the JobID this MapReduceState corresponds to
 * @param time start time for this MapReduce, in milliseconds from the epoch
 * @return the initialized MapReduceState
public static MapReduceState generateInitializedMapReduceState(DatastoreService service, String name,
        JobID jobId, long time) {
    MapReduceState state = new MapReduceState(service);
    state.entity = new Entity("MapReduceState", jobId.toString());
    state.entity.setProperty(PROGRESS_PROPERTY, 0.0);
    state.entity.setProperty(STATUS_PROPERTY, "" + Status.ACTIVE);
    state.entity.setProperty(START_TIME_PROPERTY, time);
    state.entity.setUnindexedProperty(CHART_PROPERTY, new Text(""));
    state.setCounters(new Counters());
    return state;

From source file:com.google.appengine.tools.mapreduce.MapReduceState.java

License:Apache License

 * Gets the MapReduceState corresponding to the given job ID.
 * /*  ww w . j av a2 s  . com*/
 * @param service the datastore to use for persistence
 * @param jobId the JobID to retrieve the MapReduceState for
 * @return the corresponding MapReduceState
 * @throws EntityNotFoundException if there is no MapReduceState corresponding
 * to the given JobID
public static MapReduceState getMapReduceStateFromJobID(DatastoreService service, JobID jobId)
        throws EntityNotFoundException {
    Key key = KeyFactory.createKey("MapReduceState", jobId.toString());
    MapReduceState state = new MapReduceState(service);
    state.entity = service.get(key);
    return state;

From source file:com.google.appengine.tools.mapreduce.ShardState.java

License:Apache License

 * Gets all shard states corresponding to a particular Job ID
 *//* w  w w .  ja v  a  2s.  c om*/
public static List<ShardState> getShardStatesFromJobID(DatastoreService service, JobID jobId) {
    List<Entity> shardStateEntities = service
            .prepare(new Query("ShardState").addFilter(JOB_ID_PROPERTY, FilterOperator.EQUAL, jobId.toString()))
    List<ShardState> shardStates = new ArrayList<ShardState>(shardStateEntities.size());
    for (Entity entity : shardStateEntities) {
        ShardState shardState = new ShardState(service);
        shardState.entity = entity;
    return shardStates;

From source file:com.inmobi.conduit.local.CopyMapper.java

License:Apache License

private Path getJobTmpDir(Context context, JobID jobId) {
    return new Path(new Path(context.getConfiguration().get(LOCALSTREAM_TMP_PATH)), jobId.toString());

From source file:com.inmobi.databus.local.CopyMapper.java

License:Apache License

private Path getJobTmpDir(Context context, JobID jobId) {
    return new Path(new Path(context.getConfiguration().get("localstream.tmp.path")), jobId.toString());

From source file:com.scaleoutsoftware.soss.hserver.hadoop.SubmittedJob.java

License:Apache License

SubmittedJob(JobID jobID, String jobSubmitDirectory, Credentials credentials, Configuration configuration)
        throws IOException, InterruptedException {
    this.jobID = jobID;
    this.configuration = configuration;
    this.jobSubmitDirectoryPath = new Path(jobSubmitDirectory);
    this.fileSystem = FileSystem.get(configuration);

    JobSplit.TaskSplitMetaInfo splitInfo[] = SplitMetaInfoReader.readSplitMetaInfo(jobID, fileSystem,
            configuration, jobSubmitDirectoryPath);

    Path jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDirectoryPath);
    FSDataInputStream stream = fileSystem.open(jobSplitFile);

    for (JobSplit.TaskSplitMetaInfo info : splitInfo) {
        Object split = getSplitDetails(stream, info.getStartOffset(), configuration);
        inputSplits.add(split);// w  w  w. j  a va 2  s. c om
        splitLocations.put(split, info.getLocations());
        LOG.info("Adding split for execution. Split = " + split + " Locations: "
                + Arrays.toString(splitLocations.get(split)));


    jobConfPath = JobSubmissionFiles.getJobConfPath(jobSubmitDirectoryPath);

    if (!fileSystem.exists(jobConfPath)) {
        throw new IOException("Cannot find job.xml. Path = " + jobConfPath);

    //We cannot just use JobConf(Path) constructor,
    //because it does not work for HDFS locations.
    //The comment in Configuration#loadResource() states,
    //for the case when the Path to the resource is provided:
    //"Can't use FileSystem API or we get an infinite loop
    //since FileSystem uses Configuration API.  Use java.io.File instead."
    //Workaround: construct empty Configuration, provide it with
    //input stream and give it to JobConf constructor.
    FSDataInputStream jobConfStream = fileSystem.open(jobConfPath);
    Configuration jobXML = new Configuration(false);

    //The configuration does not actually gets read before we attempt to
    //read some property. Call to #size() will make Configuration to
    //read the input stream.

    //We are done with input stream, can close it now.

    jobConf = new JobConf(jobXML);

    newApi = jobConf.getUseNewMapper();

    jobStatus = new JobStatus(jobID, 0f, 0f, 0f, 0f, JobStatus.State.RUNNING, JobPriority.NORMAL,
            UserGroupInformation.getCurrentUser().getUserName(), jobID.toString(), jobConfPath.toString(), "");

From source file:com.scaleoutsoftware.soss.hserver.JobScheduler.java

License:Apache License

 * Runs the map-reduce job on ScaleOut hServer.
 * @param jobID          the id of the job
 * @param jobConf        the job to run//w  w  w . j a  v a2  s.  c  om
 * @param isNewApi       if the job uses the new MapReduce APIs
 * @param splitType      the type of the split
 * @param inputSplits    the list of input splits
 * @param splitLocations the locations of the splits
 * @param grid           the invocation grid to run the job
 * @throws IOException            if errors occurred during the job
 * @throws InterruptedException   if the processing thread is interrupted
 * @throws ClassNotFoundException if the invocation grid does not contain the dependency class
public void runPredefinedJob(JobID jobID, JobConf jobConf, boolean isNewApi, Class splitType,
        List<?> inputSplits, Map<Object, String[]> splitLocations, InvocationGrid grid)
        throws IOException, InterruptedException, ClassNotFoundException {

    //Initialize user credential in advance
    long time = System.currentTimeMillis();
    String hadoopVersion = VersionInfo.getVersion();

    int appID = 0xFFFFFFF & BitConverter.hashStringOneInt(jobID.toString());

    try {

        org.apache.hadoop.mapreduce.OutputCommitter outputCommitter = createOutputCommitter(isNewApi, jobID,

        HadoopVersionSpecificCode hadoopVersionSpecificCode = HadoopVersionSpecificCode
                .getInstance(hadoopVersion, jobConf);

        org.apache.hadoop.mapred.JobContext jobContext = hadoopVersionSpecificCode.createJobContext(jobConf,

        //clear all temporary objects

        //Calculating the partition layout
        com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping hostNameToPartition = com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping
        List<InetAddress> hostAddresses = new ArrayList<InetAddress>(hostNameToPartition.getHosts());

        //Generating mapping of Hadoop partitions to SOSS partitions, so they are equally distributed across hosts
        int numHosts = hostAddresses.size();
        int numberOfSlotsPerNode = Math
                .max(grid != null ? grid.getMaxNumberOfCores() : Runtime.getRuntime().availableProcessors(), 1);

        //Generating split to hostname map
        Map<InetAddress, List<Integer>> splitToHostAddress = assignSplitsToHost(inputSplits, hostAddresses,

        int[] partitionMapping = hostNameToPartition.generateEvenItemDistribution(jobConf.getNumReduceTasks());

        HadoopInvocationParameters hadoopParameters = new HadoopInvocationParameters(jobConf, jobID, !isNewApi);
        HServerInvocationParameters parameters = new HServerInvocationParameters(hadoopParameters, appID,
                partitionMapping, hostNameToPartition, numberOfSlotsPerNode, splitType, inputSplits,
                splitToHostAddress, false,
                HServerParameters.getBooleanSetting(HServerParameters.SORT_KEYS, jobConf), hadoopVersion, null,

        StringBuilder stringBuilder = new StringBuilder();
        stringBuilder.append("Splits created:\n");
        for (InetAddress address : splitToHostAddress.keySet()) {
            stringBuilder.append("Host ");
            stringBuilder.append(" has ");
            stringBuilder.append(" splits.\n");

        System.out.println("Job initialization completed in " + (System.currentTimeMillis() - time) + " ms.");

        time = System.currentTimeMillis();

        InvokeResult<MapperResult> mapInvokeResult = MessagingHelper.invoke(grid,
                RunMapper.MapperInvokable.class, parameters, TimeSpan.INFINITE_TIMEOUT.getSeconds());

        if (mapInvokeResult.getErrors() != null && mapInvokeResult.getErrors().size() > 0) {
            throw new IOException("Map invocation failed.", mapInvokeResult.getErrors().get(0));

        System.out.println("Map invocation done in " + (System.currentTimeMillis() - time) + " ms.");
        time = System.currentTimeMillis();

        MapperResult resultObject = mapInvokeResult.getResult();

        if (resultObject == null || mapInvokeResult.getNumFailed() != 0) {
            throw new IOException("Mapper invocation failed. Num failed = " + mapInvokeResult.getNumFailed());

        if (resultObject.getNumberOfSplitsProcessed() != inputSplits.size()) {
            throw new IOException("Number of splits does not match the number of invocations. Nsplits = "
                    + inputSplits.size() + ", Ninvokes =" + resultObject.getNumberOfSplitsProcessed());

        if (partitionMapping.length > 0) {
            //Running the reduce step
            InvokeResult<Integer> reduceInvokeResult = MessagingHelper.invoke(grid, ReduceInvokable.class,
                    appID, TimeSpan.INFINITE_TIMEOUT.getSeconds());

            System.out.println("Reduce invocation done in " + (System.currentTimeMillis() - time) + " ms.");

            DataAccessor.clearObjects(appID); //clear all temporary objects

            if (reduceInvokeResult.getErrors() != null && reduceInvokeResult.getErrors().size() > 0) {
                throw new IOException("Reduce invocation failed.", reduceInvokeResult.getErrors().get(0));
            if (reduceInvokeResult.getNumFailed() != 0) {
                throw new IOException("Reduce invocation failed.");
            if (reduceInvokeResult.getResult() != partitionMapping.length) {
                throw new IOException("Not all partitions were reduced. Expected = " + partitionMapping.length
                        + " Actual = " + reduceInvokeResult.getResult());
    } catch (StateServerException e) {
        throw new IOException("ScaleOut hServer access error.", e);


From source file:com.scaleoutsoftware.soss.hserver.JobScheduler.java

License:Apache License

 * Runs the map-reduce job on ScaleOut hServer.*
 * @param job          the job to run//from   w  w  w.jav  a2s  .co  m
 * @param jobId        the id of the job
 * @param sortEnabled  if key sorting is enabled
 * @param jobParameter user defined parameter object for the job
 * @param grid         the invocation grid to run the job
 * @throws IOException            if errors occurred during the job
 * @throws InterruptedException   if the processing thread is interrupted
 * @throws ClassNotFoundException if the invocation grid does not contain the dependency class
public void runOldApiJob(JobConf job, org.apache.hadoop.mapred.JobID jobId, boolean sortEnabled,
        Object jobParameter, InvocationGrid grid)
        throws IOException, InterruptedException, ClassNotFoundException {
    //Initialize user credential in advance
    int jobAppId = 0xFFFFFFF & BitConverter.hashStringOneInt(jobId.toString());
    String hadoopVersion = VersionInfo.getVersion();
    long time = System.currentTimeMillis();

    try {
        //Check output specs before running the job
        job.getOutputFormat().checkOutputSpecs(FileSystem.get(job), job);

        JobContext jContext = HadoopVersionSpecificCode.getInstance(hadoopVersion, job).createJobContext(job,

        org.apache.hadoop.mapred.OutputCommitter outputCommitter = job.getOutputCommitter();

        //clear all temporary objects

        //Calculating the partition layout
        com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping hostNameToPartition = com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping
        List<InetAddress> hostAddresses = new ArrayList<InetAddress>(hostNameToPartition.getHosts());

        //Generating mapping of Hadoop partitions to SOSS partitions, so they are equally distributed across hosts
        int numHosts = hostAddresses.size();
        int numberOfSlotsPerNode = Math
                .max(grid != null ? grid.getMaxNumberOfCores() : Runtime.getRuntime().availableProcessors(), 1);

        //Set the number of splits to the number of cores
        if (NamedMapInputFormatMapred.class.isAssignableFrom(job.getInputFormat().getClass())) {
            int numberOfSplits = HServerParameters.getSetting(MAP_SPLITS_PER_CORE, job) * numHosts
                    * numberOfSlotsPerNode;
            job.setNumMapTasks(Math.min(numberOfSplits, HServerConstants.MAX_MAP_REDUCE_TASKS));

        //Generating split to hostname map
        org.apache.hadoop.mapred.InputFormat inputFormat = job.getInputFormat();
        List<org.apache.hadoop.mapred.InputSplit> splitList = Arrays
                .asList(inputFormat.getSplits(job, job.getNumMapTasks()));
        Map<InetAddress, List<Integer>> splitToHostAddress = assignSplitsToHost(splitList, hostAddresses, null);

        //Choose the optimal number of reducers for GridOutputFormat
        if (job.getOutputFormat() instanceof NamedMapOutputFormatMapred) {
            job.setNumReduceTasks(numHosts * numberOfSlotsPerNode);
            sortEnabled = false;

        int[] partitionMapping = hostNameToPartition.generateEvenItemDistribution(job.getNumReduceTasks());

        //Generating invocation parameters
        Class<? extends org.apache.hadoop.mapred.InputSplit> splitType = splitList.size() > 0
                ? splitList.get(0).getClass()
                : null;

        HadoopInvocationParameters hadoopParameters = new HadoopInvocationParameters(job, jobId, true);

        HServerInvocationParameters<org.apache.hadoop.mapred.InputSplit> parameters = new HServerInvocationParameters<org.apache.hadoop.mapred.InputSplit>(
                hadoopParameters, jobAppId, partitionMapping, hostNameToPartition, numberOfSlotsPerNode,
                splitType, splitList, splitToHostAddress, false, sortEnabled, hadoopVersion, jobParameter,

        StringBuilder stringBuilder = new StringBuilder();
        stringBuilder.append("Splits created:\n");
        for (InetAddress address : splitToHostAddress.keySet()) {
            stringBuilder.append("Host ");
            stringBuilder.append(" has ");
            stringBuilder.append(" splits.\n");

        System.out.println("Job initialization completed in " + (System.currentTimeMillis() - time) + " ms.");
        time = System.currentTimeMillis();

        InvokeResult<MapperResult> mapInvokeResult = MessagingHelper.invoke(grid,
                RunMapper.MapperInvokable.class, parameters, TimeSpan.INFINITE_TIMEOUT.getSeconds());

        if (mapInvokeResult.getErrors() != null && mapInvokeResult.getErrors().size() > 0) {
            throw new IOException("Map invocation failed.", mapInvokeResult.getErrors().get(0));

        System.out.println("Map invocation done in " + (System.currentTimeMillis() - time) + " ms.");
        time = System.currentTimeMillis();

        MapperResult resultObject = mapInvokeResult.getResult();

        if (resultObject == null || mapInvokeResult.getNumFailed() != 0) {
            throw new IOException("Mapper invocation failed. Num failed = " + mapInvokeResult.getNumFailed());

        if (resultObject.getNumberOfSplitsProcessed() != splitList.size()) {
            throw new IOException("Number of splits does not match the number of invocations. Nsplits = "
                    + splitList.size() + ", Ninvokes =" + resultObject.getNumberOfSplitsProcessed());

        if (partitionMapping.length > 0) {
            //Running the reduce step
            InvokeResult<Integer> reduceInvokeResult = MessagingHelper.invoke(grid, ReduceInvokable.class,
                    jobAppId, TimeSpan.INFINITE_TIMEOUT.getSeconds());

            System.out.println("Reduce invocation done in " + (System.currentTimeMillis() - time) + " ms.");

            DataAccessor.clearObjects(jobAppId); //clear all temporary objects

            if (reduceInvokeResult.getErrors() != null && reduceInvokeResult.getErrors().size() > 0) {
                throw new IOException("Reduce invocation failed.", reduceInvokeResult.getErrors().get(0));
            if (reduceInvokeResult.getNumFailed() != 0) {
                throw new IOException("Reduce invocation failed.");
            if (reduceInvokeResult.getResult() != partitionMapping.length) {
                throw new IOException("Not all partitions were reduced. Expected = " + partitionMapping.length
                        + " Actual = " + reduceInvokeResult.getResult());
    } catch (StateServerException e) {
        throw new IOException("ScaleOut hServer access error.", e);
