Example usage for org.apache.hadoop.mapreduce JobID toString

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobID toString.

Prototype

@Override
    public String toString()

Source Link

Usage

From source file:com.google.appengine.tools.mapreduce.AppEngineJobContextTest.java

License:Apache License

public void testGetJobContextFromRequest() {
    JobID jobId = new JobID("foo", 1);
    HttpServletRequest req = createMockMapReduceRequest(jobId);
    replay(req);//  ww  w. ja va  2 s . co  m

    Configuration conf = ConfigurationXmlUtil.getConfigurationFromXml(SIMPLE_CONF_XML);
    persistMRState(jobId, conf);

    JobContext context = new AppEngineJobContext(req);
    assertEquals("/tmp/foo", context.getConfiguration().get("foo.bar"));
    assertEquals(jobId.toString(), context.getJobID().toString());
    verify(req);
}

From source file:com.google.appengine.tools.mapreduce.MapReduceServlet.java

License:Apache License

private Configuration reducerConfigFromMapperConfig(JobID mapperJobID, MapReduceState mapperState) {

    Configuration reducerConfig = ConfigurationXmlUtil
            .getConfigurationFromXml(mapperState.getConfigurationXML());

    // define input for the reducing "mapper" as a function of our just completed mapping job
    reducerConfig.set("mapreduce.inputformat.class", IntermediateInputFormat.class.getName());
    reducerConfig.set(IntermediateInputFormat.MAPPER_JOBID_KEY, mapperJobID.toString());
    reducerConfig.set(IntermediateInputFormat.MIN_OUTPUT_KEY, mapperState.getOutputKeyRange().getMinKey());
    reducerConfig.set(IntermediateInputFormat.MAX_OUTPUT_KEY, mapperState.getOutputKeyRange().getMaxKey());

    // defined the "mapper" that handles the reduction process
    reducerConfig.set("mapreduce.map.class", ReducingMapper.class.getName());

    if (reducerConfig.get(AppEngineJobContext.REDUCER_SHARD_COUNT_KEY) != null) {
        reducerConfig.set(AppEngineJobContext.MAPPER_SHARD_COUNT_KEY,
                reducerConfig.get(AppEngineJobContext.REDUCER_SHARD_COUNT_KEY));
    }/*from w w w  . ja  v  a  2s  . c  o  m*/
    return reducerConfig;
}

From source file:com.google.appengine.tools.mapreduce.MapReduceState.java

License:Apache License

/**
 * Generates a MapReduceState that's configured with the given parameters, is
 * set as active, and has made no progress as of yet.
 * /*from w  ww. ja va 2 s  . c o m*/
 * The MapReduceState needs to have a configuration set via
 * {@code #setConfigurationXML(String)} before it can be persisted.
 * 
 * @param service the datastore to persist the MapReduceState to
 * @string name user visible name for this MapReduce
 * @param jobId the JobID this MapReduceState corresponds to
 * @param time start time for this MapReduce, in milliseconds from the epoch
 * @return the initialized MapReduceState
 */
public static MapReduceState generateInitializedMapReduceState(DatastoreService service, String name,
        JobID jobId, long time) {
    MapReduceState state = new MapReduceState(service);
    state.entity = new Entity("MapReduceState", jobId.toString());
    state.setName(name);
    state.entity.setProperty(PROGRESS_PROPERTY, 0.0);
    state.entity.setProperty(STATUS_PROPERTY, "" + Status.ACTIVE);
    state.entity.setProperty(START_TIME_PROPERTY, time);
    state.entity.setUnindexedProperty(CHART_PROPERTY, new Text(""));
    state.setCounters(new Counters());
    state.setActiveShardCount(0);
    state.setShardCount(0);
    return state;
}

From source file:com.google.appengine.tools.mapreduce.MapReduceState.java

License:Apache License

/**
 * Gets the MapReduceState corresponding to the given job ID.
 * /*  ww w . j av a2 s  . com*/
 * @param service the datastore to use for persistence
 * @param jobId the JobID to retrieve the MapReduceState for
 * @return the corresponding MapReduceState
 * @throws EntityNotFoundException if there is no MapReduceState corresponding
 * to the given JobID
 */
public static MapReduceState getMapReduceStateFromJobID(DatastoreService service, JobID jobId)
        throws EntityNotFoundException {
    Key key = KeyFactory.createKey("MapReduceState", jobId.toString());
    MapReduceState state = new MapReduceState(service);
    state.entity = service.get(key);
    return state;
}

From source file:com.google.appengine.tools.mapreduce.ShardState.java

License:Apache License

/**
 * Gets all shard states corresponding to a particular Job ID
 *//* w  w w .  ja v  a  2s.  c om*/
public static List<ShardState> getShardStatesFromJobID(DatastoreService service, JobID jobId) {
    List<Entity> shardStateEntities = service
            .prepare(new Query("ShardState").addFilter(JOB_ID_PROPERTY, FilterOperator.EQUAL, jobId.toString()))
            .asList(FetchOptions.Builder.withLimit(1000));
    List<ShardState> shardStates = new ArrayList<ShardState>(shardStateEntities.size());
    for (Entity entity : shardStateEntities) {
        ShardState shardState = new ShardState(service);
        shardState.entity = entity;
        shardStates.add(shardState);
    }
    return shardStates;
}

From source file:com.inmobi.conduit.local.CopyMapper.java

License:Apache License

private Path getJobTmpDir(Context context, JobID jobId) {
    return new Path(new Path(context.getConfiguration().get(LOCALSTREAM_TMP_PATH)), jobId.toString());
}

From source file:com.inmobi.databus.local.CopyMapper.java

License:Apache License

private Path getJobTmpDir(Context context, JobID jobId) {
    return new Path(new Path(context.getConfiguration().get("localstream.tmp.path")), jobId.toString());
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.SubmittedJob.java

License:Apache License

SubmittedJob(JobID jobID, String jobSubmitDirectory, Credentials credentials, Configuration configuration)
        throws IOException, InterruptedException {
    this.jobID = jobID;
    this.configuration = configuration;
    this.jobSubmitDirectoryPath = new Path(jobSubmitDirectory);
    this.fileSystem = FileSystem.get(configuration);

    JobSplit.TaskSplitMetaInfo splitInfo[] = SplitMetaInfoReader.readSplitMetaInfo(jobID, fileSystem,
            configuration, jobSubmitDirectoryPath);

    Path jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDirectoryPath);
    FSDataInputStream stream = fileSystem.open(jobSplitFile);

    for (JobSplit.TaskSplitMetaInfo info : splitInfo) {
        Object split = getSplitDetails(stream, info.getStartOffset(), configuration);
        inputSplits.add(split);// w  w  w. j  a va 2  s. c om
        splitLocations.put(split, info.getLocations());
        LOG.info("Adding split for execution. Split = " + split + " Locations: "
                + Arrays.toString(splitLocations.get(split)));
    }

    stream.close();

    jobConfPath = JobSubmissionFiles.getJobConfPath(jobSubmitDirectoryPath);

    if (!fileSystem.exists(jobConfPath)) {
        throw new IOException("Cannot find job.xml. Path = " + jobConfPath);
    }

    //We cannot just use JobConf(Path) constructor,
    //because it does not work for HDFS locations.
    //The comment in Configuration#loadResource() states,
    //for the case when the Path to the resource is provided:
    //"Can't use FileSystem API or we get an infinite loop
    //since FileSystem uses Configuration API.  Use java.io.File instead."
    //
    //Workaround: construct empty Configuration, provide it with
    //input stream and give it to JobConf constructor.
    FSDataInputStream jobConfStream = fileSystem.open(jobConfPath);
    Configuration jobXML = new Configuration(false);
    jobXML.addResource(jobConfStream);

    //The configuration does not actually gets read before we attempt to
    //read some property. Call to #size() will make Configuration to
    //read the input stream.
    jobXML.size();

    //We are done with input stream, can close it now.
    jobConfStream.close();

    jobConf = new JobConf(jobXML);

    newApi = jobConf.getUseNewMapper();

    jobStatus = new JobStatus(jobID, 0f, 0f, 0f, 0f, JobStatus.State.RUNNING, JobPriority.NORMAL,
            UserGroupInformation.getCurrentUser().getUserName(), jobID.toString(), jobConfPath.toString(), "");
}

From source file:com.scaleoutsoftware.soss.hserver.JobScheduler.java

License:Apache License

/**
 * Runs the map-reduce job on ScaleOut hServer.
 *
 * @param jobID          the id of the job
 * @param jobConf        the job to run//w  w  w . j a  v a2  s.  c  om
 * @param isNewApi       if the job uses the new MapReduce APIs
 * @param splitType      the type of the split
 * @param inputSplits    the list of input splits
 * @param splitLocations the locations of the splits
 * @param grid           the invocation grid to run the job
 * @throws IOException            if errors occurred during the job
 * @throws InterruptedException   if the processing thread is interrupted
 * @throws ClassNotFoundException if the invocation grid does not contain the dependency class
 */
@SuppressWarnings("unchecked")
public void runPredefinedJob(JobID jobID, JobConf jobConf, boolean isNewApi, Class splitType,
        List<?> inputSplits, Map<Object, String[]> splitLocations, InvocationGrid grid)
        throws IOException, InterruptedException, ClassNotFoundException {

    //Initialize user credential in advance
    long time = System.currentTimeMillis();
    CreateUserCredentials.run(grid);
    String hadoopVersion = VersionInfo.getVersion();

    int appID = 0xFFFFFFF & BitConverter.hashStringOneInt(jobID.toString());

    try {

        org.apache.hadoop.mapreduce.OutputCommitter outputCommitter = createOutputCommitter(isNewApi, jobID,
                jobConf);

        HadoopVersionSpecificCode hadoopVersionSpecificCode = HadoopVersionSpecificCode
                .getInstance(hadoopVersion, jobConf);

        org.apache.hadoop.mapred.JobContext jobContext = hadoopVersionSpecificCode.createJobContext(jobConf,
                jobID);
        outputCommitter.setupJob(jobContext);

        //clear all temporary objects
        DataAccessor.clearObjects(appID);

        //Calculating the partition layout
        com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping hostNameToPartition = com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping
                .getCurrent();
        List<InetAddress> hostAddresses = new ArrayList<InetAddress>(hostNameToPartition.getHosts());

        //Generating mapping of Hadoop partitions to SOSS partitions, so they are equally distributed across hosts
        int numHosts = hostAddresses.size();
        int numberOfSlotsPerNode = Math
                .max(grid != null ? grid.getMaxNumberOfCores() : Runtime.getRuntime().availableProcessors(), 1);

        //Generating split to hostname map
        Map<InetAddress, List<Integer>> splitToHostAddress = assignSplitsToHost(inputSplits, hostAddresses,
                splitLocations);

        int[] partitionMapping = hostNameToPartition.generateEvenItemDistribution(jobConf.getNumReduceTasks());

        HadoopInvocationParameters hadoopParameters = new HadoopInvocationParameters(jobConf, jobID, !isNewApi);
        HServerInvocationParameters parameters = new HServerInvocationParameters(hadoopParameters, appID,
                partitionMapping, hostNameToPartition, numberOfSlotsPerNode, splitType, inputSplits,
                splitToHostAddress, false,
                HServerParameters.getBooleanSetting(HServerParameters.SORT_KEYS, jobConf), hadoopVersion, null,
                SerializationMode.DEFAULT);

        StringBuilder stringBuilder = new StringBuilder();
        stringBuilder.append("Splits created:\n");
        for (InetAddress address : splitToHostAddress.keySet()) {
            stringBuilder.append("Host ");
            stringBuilder.append(address);
            stringBuilder.append(" has ");
            stringBuilder.append(splitToHostAddress.get(address).size());
            stringBuilder.append(" splits.\n");
        }
        System.out.println(stringBuilder.toString());

        System.out.println("Job initialization completed in " + (System.currentTimeMillis() - time) + " ms.");

        time = System.currentTimeMillis();

        InvokeResult<MapperResult> mapInvokeResult = MessagingHelper.invoke(grid,
                RunMapper.MapperInvokable.class, parameters, TimeSpan.INFINITE_TIMEOUT.getSeconds());

        if (mapInvokeResult.getErrors() != null && mapInvokeResult.getErrors().size() > 0) {
            throw new IOException("Map invocation failed.", mapInvokeResult.getErrors().get(0));
        }

        System.out.println("Map invocation done in " + (System.currentTimeMillis() - time) + " ms.");
        time = System.currentTimeMillis();

        MapperResult resultObject = mapInvokeResult.getResult();

        if (resultObject == null || mapInvokeResult.getNumFailed() != 0) {
            throw new IOException("Mapper invocation failed. Num failed = " + mapInvokeResult.getNumFailed());
        }

        if (resultObject.getNumberOfSplitsProcessed() != inputSplits.size()) {
            throw new IOException("Number of splits does not match the number of invocations. Nsplits = "
                    + inputSplits.size() + ", Ninvokes =" + resultObject.getNumberOfSplitsProcessed());
        }

        if (partitionMapping.length > 0) {
            //Running the reduce step
            InvokeResult<Integer> reduceInvokeResult = MessagingHelper.invoke(grid, ReduceInvokable.class,
                    appID, TimeSpan.INFINITE_TIMEOUT.getSeconds());

            System.out.println("Reduce invocation done in " + (System.currentTimeMillis() - time) + " ms.");

            DataAccessor.clearObjects(appID); //clear all temporary objects

            if (reduceInvokeResult.getErrors() != null && reduceInvokeResult.getErrors().size() > 0) {
                throw new IOException("Reduce invocation failed.", reduceInvokeResult.getErrors().get(0));
            }
            if (reduceInvokeResult.getNumFailed() != 0) {
                throw new IOException("Reduce invocation failed.");
            }
            if (reduceInvokeResult.getResult() != partitionMapping.length) {
                throw new IOException("Not all partitions were reduced. Expected = " + partitionMapping.length
                        + " Actual = " + reduceInvokeResult.getResult());
            }
        }
        outputCommitter.commitJob(jobContext);
    } catch (StateServerException e) {
        throw new IOException("ScaleOut hServer access error.", e);
    }

}

From source file:com.scaleoutsoftware.soss.hserver.JobScheduler.java

License:Apache License

/**
 * Runs the map-reduce job on ScaleOut hServer.*
 *
 * @param job          the job to run//from   w  w  w.jav  a2s  .co  m
 * @param jobId        the id of the job
 * @param sortEnabled  if key sorting is enabled
 * @param jobParameter user defined parameter object for the job
 * @param grid         the invocation grid to run the job
 * @throws IOException            if errors occurred during the job
 * @throws InterruptedException   if the processing thread is interrupted
 * @throws ClassNotFoundException if the invocation grid does not contain the dependency class
 */
@SuppressWarnings("unchecked")
public void runOldApiJob(JobConf job, org.apache.hadoop.mapred.JobID jobId, boolean sortEnabled,
        Object jobParameter, InvocationGrid grid)
        throws IOException, InterruptedException, ClassNotFoundException {
    //Initialize user credential in advance
    int jobAppId = 0xFFFFFFF & BitConverter.hashStringOneInt(jobId.toString());
    String hadoopVersion = VersionInfo.getVersion();
    long time = System.currentTimeMillis();
    CreateUserCredentials.run(grid);

    try {
        //Check output specs before running the job
        job.getOutputFormat().checkOutputSpecs(FileSystem.get(job), job);

        JobContext jContext = HadoopVersionSpecificCode.getInstance(hadoopVersion, job).createJobContext(job,
                jobId);

        org.apache.hadoop.mapred.OutputCommitter outputCommitter = job.getOutputCommitter();
        outputCommitter.setupJob(jContext);

        //clear all temporary objects
        DataAccessor.clearObjects(jobAppId);

        //Calculating the partition layout
        com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping hostNameToPartition = com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping
                .getCurrent();
        List<InetAddress> hostAddresses = new ArrayList<InetAddress>(hostNameToPartition.getHosts());

        //Generating mapping of Hadoop partitions to SOSS partitions, so they are equally distributed across hosts
        int numHosts = hostAddresses.size();
        int numberOfSlotsPerNode = Math
                .max(grid != null ? grid.getMaxNumberOfCores() : Runtime.getRuntime().availableProcessors(), 1);

        //Set the number of splits to the number of cores
        if (NamedMapInputFormatMapred.class.isAssignableFrom(job.getInputFormat().getClass())) {
            int numberOfSplits = HServerParameters.getSetting(MAP_SPLITS_PER_CORE, job) * numHosts
                    * numberOfSlotsPerNode;
            job.setNumMapTasks(Math.min(numberOfSplits, HServerConstants.MAX_MAP_REDUCE_TASKS));
        }

        //Generating split to hostname map
        org.apache.hadoop.mapred.InputFormat inputFormat = job.getInputFormat();
        List<org.apache.hadoop.mapred.InputSplit> splitList = Arrays
                .asList(inputFormat.getSplits(job, job.getNumMapTasks()));
        Map<InetAddress, List<Integer>> splitToHostAddress = assignSplitsToHost(splitList, hostAddresses, null);

        //Choose the optimal number of reducers for GridOutputFormat
        if (job.getOutputFormat() instanceof NamedMapOutputFormatMapred) {
            job.setNumReduceTasks(numHosts * numberOfSlotsPerNode);
            sortEnabled = false;
        }

        int[] partitionMapping = hostNameToPartition.generateEvenItemDistribution(job.getNumReduceTasks());

        //Generating invocation parameters
        Class<? extends org.apache.hadoop.mapred.InputSplit> splitType = splitList.size() > 0
                ? splitList.get(0).getClass()
                : null;

        HadoopInvocationParameters hadoopParameters = new HadoopInvocationParameters(job, jobId, true);

        HServerInvocationParameters<org.apache.hadoop.mapred.InputSplit> parameters = new HServerInvocationParameters<org.apache.hadoop.mapred.InputSplit>(
                hadoopParameters, jobAppId, partitionMapping, hostNameToPartition, numberOfSlotsPerNode,
                splitType, splitList, splitToHostAddress, false, sortEnabled, hadoopVersion, jobParameter,
                SerializationMode.DEFAULT);

        StringBuilder stringBuilder = new StringBuilder();
        stringBuilder.append("Splits created:\n");
        for (InetAddress address : splitToHostAddress.keySet()) {
            stringBuilder.append("Host ");
            stringBuilder.append(address);
            stringBuilder.append(" has ");
            stringBuilder.append(splitToHostAddress.get(address).size());
            stringBuilder.append(" splits.\n");
        }
        System.out.println(stringBuilder.toString());

        System.out.println("Job initialization completed in " + (System.currentTimeMillis() - time) + " ms.");
        time = System.currentTimeMillis();

        InvokeResult<MapperResult> mapInvokeResult = MessagingHelper.invoke(grid,
                RunMapper.MapperInvokable.class, parameters, TimeSpan.INFINITE_TIMEOUT.getSeconds());

        if (mapInvokeResult.getErrors() != null && mapInvokeResult.getErrors().size() > 0) {
            throw new IOException("Map invocation failed.", mapInvokeResult.getErrors().get(0));
        }

        System.out.println("Map invocation done in " + (System.currentTimeMillis() - time) + " ms.");
        time = System.currentTimeMillis();

        MapperResult resultObject = mapInvokeResult.getResult();

        if (resultObject == null || mapInvokeResult.getNumFailed() != 0) {
            throw new IOException("Mapper invocation failed. Num failed = " + mapInvokeResult.getNumFailed());
        }

        if (resultObject.getNumberOfSplitsProcessed() != splitList.size()) {
            throw new IOException("Number of splits does not match the number of invocations. Nsplits = "
                    + splitList.size() + ", Ninvokes =" + resultObject.getNumberOfSplitsProcessed());
        }

        if (partitionMapping.length > 0) {
            //Running the reduce step
            InvokeResult<Integer> reduceInvokeResult = MessagingHelper.invoke(grid, ReduceInvokable.class,
                    jobAppId, TimeSpan.INFINITE_TIMEOUT.getSeconds());

            System.out.println("Reduce invocation done in " + (System.currentTimeMillis() - time) + " ms.");

            DataAccessor.clearObjects(jobAppId); //clear all temporary objects

            if (reduceInvokeResult.getErrors() != null && reduceInvokeResult.getErrors().size() > 0) {
                throw new IOException("Reduce invocation failed.", reduceInvokeResult.getErrors().get(0));
            }
            if (reduceInvokeResult.getNumFailed() != 0) {
                throw new IOException("Reduce invocation failed.");
            }
            if (reduceInvokeResult.getResult() != partitionMapping.length) {
                throw new IOException("Not all partitions were reduced. Expected = " + partitionMapping.length
                        + " Actual = " + reduceInvokeResult.getResult());
            }
        }
        outputCommitter.commitJob(jContext);
    } catch (StateServerException e) {
        throw new IOException("ScaleOut hServer access error.", e);
    }

}