Example usage for org.apache.spark.api.java JavaFutureAction jobIds

List of usage examples for org.apache.spark.api.java JavaFutureAction jobIds

Introduction

In this page you can find the example usage for org.apache.spark.api.java JavaFutureAction jobIds.

Prototype

List<Integer> jobIds();

Source Link

Document

Returns the job IDs run by the underlying async operation.

Usage

From source file:cn.com.bsfit.frms.spark.StatusTrackerDemo.java

License:Apache License

public static void main(String[] args) throws Exception {
    SparkSession spark = SparkSession.builder().appName(APP_NAME).getOrCreate();

    final JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());

    // Example of implementing a progress reporter for a simple job.
    JavaRDD<Integer> rdd = jsc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5)
            .map(new IdentityWithDelay<Integer>());
    JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync();
    while (!jobFuture.isDone()) {
        Thread.sleep(1000); // 1 second
        List<Integer> jobIds = jobFuture.jobIds();
        if (jobIds.isEmpty()) {
            continue;
        }//from w w w. j  a v a2s  .c o m
        int currentJobId = jobIds.get(jobIds.size() - 1);
        SparkJobInfo jobInfo = jsc.statusTracker().getJobInfo(currentJobId);
        SparkStageInfo stageInfo = jsc.statusTracker().getStageInfo(jobInfo.stageIds()[0]);
        System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() + " active, "
                + stageInfo.numCompletedTasks() + " complete");
    }

    System.out.println("Job results are: " + jobFuture.get());
    jsc.close();
    spark.stop();
}

From source file:com.andado.spark.examples.JavaStatusTrackerDemo.java

License:Apache License

public static void main(String[] args) throws Exception {
    SparkSession spark = SparkSession.builder().appName(APP_NAME).getOrCreate();

    final JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());

    // Example of implementing a progress reporter for a simple job.
    JavaRDD<Integer> rdd = jsc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5)
            .map(new IdentityWithDelay<Integer>());
    JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync();
    while (!jobFuture.isDone()) {
        Thread.sleep(1000); // 1 second
        List<Integer> jobIds = jobFuture.jobIds();
        if (jobIds.isEmpty()) {
            continue;
        }// w w  w  . j a  v a  2s . c om
        int currentJobId = jobIds.get(jobIds.size() - 1);
        SparkJobInfo jobInfo = jsc.statusTracker().getJobInfo(currentJobId);
        SparkStageInfo stageInfo = jsc.statusTracker().getStageInfo(jobInfo.stageIds()[0]);
        System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() + " active, "
                + stageInfo.numCompletedTasks() + " complete");
    }

    System.out.println("Job results are: " + jobFuture.get());
    spark.stop();
}

From source file:com.cloudera.livy.client.local.driver.BypassJobWrapper.java

License:Apache License

@Override
protected synchronized void jobSubmitted(JavaFutureAction<?> job) {
    for (Integer i : job.jobIds()) {
        recordNewJob(i);//from ww w  .jav a 2 s  .c o m
    }
}

From source file:com.cloudera.livy.client.local.driver.JobWrapper.java

License:Apache License

boolean hasSparkJobId(Integer sparkId) {
    for (JavaFutureAction<?> future : sparkJobs) {
        if (future.jobIds().contains(sparkId)) {
            return true;
        }/*  ww w. j a  v a2  s.c o m*/
    }
    return false;
}

From source file:com.cloudera.livy.client.local.driver.JobWrapper.java

License:Apache License

protected void jobSubmitted(JavaFutureAction<?> job) {
    client.jobSubmitted(jobId, job.jobIds().get(0));
}

From source file:com.dmall.order.realtime.tacking.function.JavaStatusTrackerDemo.java

License:Apache License

public static void main(String[] args) throws Exception {
    SparkSession spark = SparkSession.builder().appName(APP_NAME).master("spark://192.168.184.128:7077")
            .getOrCreate();//from   w  ww.j a v a2s.c  o  m

    final JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
    jsc.addJar("D:\\learn\\java\\learn-spark\\target\\spark.jar");
    // Example of implementing a progress reporter for a simple job.
    JavaRDD<Integer> rdd = jsc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5)
            .map(new IdentityWithDelay<Integer>());
    JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync();
    while (!jobFuture.isDone()) {
        Thread.sleep(1000); // 1 second
        List<Integer> jobIds = jobFuture.jobIds();
        if (jobIds.isEmpty()) {
            continue;
        }
        int currentJobId = jobIds.get(jobIds.size() - 1);
        SparkJobInfo jobInfo = jsc.statusTracker().getJobInfo(currentJobId);
        SparkStageInfo stageInfo = jsc.statusTracker().getStageInfo(jobInfo.stageIds()[0]);
        System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() + " active, "
                + stageInfo.numCompletedTasks() + " complete");
    }

    System.out.println("Job results are: " + jobFuture.get());
    spark.stop();
}

From source file:com.hxr.bigdata.spark.example141.JavaStatusTrackerDemo.java

License:Apache License

public static void main(String[] args) throws Exception {
    SparkConf sparkConf = new SparkConf().setAppName(APP_NAME);
    final JavaSparkContext sc = new JavaSparkContext(sparkConf);

    // Example of implementing a progress reporter for a simple job.
    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5)
            .map(new IdentityWithDelay<Integer>());
    JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync();
    while (!jobFuture.isDone()) {
        Thread.sleep(1000); // 1 second
        List<Integer> jobIds = jobFuture.jobIds();
        if (jobIds.isEmpty()) {
            continue;
        }/*from   ww w .  j  ava 2s.c  o  m*/
        int currentJobId = jobIds.get(jobIds.size() - 1);
        SparkJobInfo jobInfo = sc.statusTracker().getJobInfo(currentJobId);
        SparkStageInfo stageInfo = sc.statusTracker().getStageInfo(jobInfo.stageIds()[0]);
        System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() + " active, "
                + stageInfo.numCompletedTasks() + " complete");
    }

    System.out.println("Job results are: " + jobFuture.get());
    sc.stop();
}

From source file:com.splicemachine.olap.CompactionJob.java

License:Apache License

@Override
public Void call() throws Exception {
    if (!status.markRunning()) {
        //the client has already cancelled us or has died before we could get started, so stop now
        return null;
    }/*from  w  w w .  ja  va2s. com*/
    initializeJob();
    Configuration conf = new Configuration(HConfiguration.unwrapDelegate());
    if (LOG.isTraceEnabled()) {
        LOG.trace("regionLocation = " + compactionRequest.regionLocation);
    }
    conf.set(MRConstants.REGION_LOCATION, compactionRequest.regionLocation);
    conf.set(MRConstants.COMPACTION_FILES, getCompactionFilesBase64String());

    SpliceSpark.pushScope(compactionRequest.scope + ": Parallelize");
    //JavaRDD rdd1 = SpliceSpark.getContext().parallelize(files, 1);
    //ParallelCollectionRDD rdd1 = getCompactionRDD();

    JavaSparkContext context = SpliceSpark.getContext();
    JavaPairRDD<Integer, Iterator> rdd1 = context.newAPIHadoopRDD(conf, CompactionInputFormat.class,
            Integer.class, Iterator.class);
    rdd1.setName("Distribute Compaction Load");
    SpliceSpark.popScope();

    SpliceSpark.pushScope(compactionRequest.scope + ": Compact files");
    JavaRDD<String> rdd2 = rdd1.mapPartitions(new SparkFlatMapFunction<>(compactionRequest.compactionFunction));
    rdd2.setName(compactionRequest.jobDetails);
    SpliceSpark.popScope();

    SpliceSpark.pushScope("Compaction");
    if (!status.isRunning()) {
        //the client timed out during our setup, so it's time to stop
        return null;
    }
    long startTime = clock.currentTimeMillis();
    JavaFutureAction<List<String>> collectFuture = rdd2.collectAsync();
    while (!collectFuture.isDone()) {
        try {
            collectFuture.get(tickTime, TimeUnit.MILLISECONDS);
        } catch (TimeoutException te) {
            /*
             * A TimeoutException just means that tickTime expired. That's okay, we just stick our
             * head up and make sure that the client is still operating
             */
        }
        if (!status.isRunning()) {
            /*
             * The client timed out, so cancel the compaction and terminate
             */
            collectFuture.cancel(true);
            context.cancelJobGroup(compactionRequest.jobGroup);
            return null;
        }
        if (clock.currentTimeMillis() - startTime > compactionRequest.maxWait) {
            // Make sure compaction is scheduled in Spark and running, otherwise cancel it and fallback to in-HBase compaction
            if (!compactionRunning(collectFuture.jobIds())) {
                collectFuture.cancel(true);
                context.cancelJobGroup(compactionRequest.jobGroup);
                status.markCompleted(new FailedOlapResult(new RejectedExecutionException(
                        "No resources available for running compaction in Spark")));
                return null;
            }
        }
    }
    //the compaction completed
    List<String> sPaths = collectFuture.get();
    status.markCompleted(new CompactionResult(sPaths));
    SpliceSpark.popScope();

    if (LOG.isTraceEnabled())
        SpliceLogUtils.trace(LOG, "Paths Returned: %s", sPaths);
    return null;
}

From source file:gtl.spark.java.example.apache.JavaStatusTrackerDemo.java

License:Apache License

public static void main(String[] args) throws Exception {
    SparkSession spark = SparkSession.builder().appName(APP_NAME).getOrCreate();

    JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());

    // Example of implementing a progress reporter for a simple job.
    JavaRDD<Integer> rdd = jsc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5).map(new IdentityWithDelay<>());
    JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync();
    while (!jobFuture.isDone()) {
        Thread.sleep(1000); // 1 second
        List<Integer> jobIds = jobFuture.jobIds();
        if (jobIds.isEmpty()) {
            continue;
        }/*w  ww  . j  av a 2 s  . c o  m*/
        int currentJobId = jobIds.get(jobIds.size() - 1);
        SparkJobInfo jobInfo = jsc.statusTracker().getJobInfo(currentJobId);
        SparkStageInfo stageInfo = jsc.statusTracker().getStageInfo(jobInfo.stageIds()[0]);
        System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() + " active, "
                + stageInfo.numCompletedTasks() + " complete");
    }

    System.out.println("Job results are: " + jobFuture.get());
    spark.stop();
}

From source file:org.apache.hadoop.hive.ql.exec.spark.LocalHiveSparkClient.java

License:Apache License

@Override
public SparkJobRef execute(DriverContext driverContext, SparkWork sparkWork) throws Exception {
    Context ctx = driverContext.getCtx();
    HiveConf hiveConf = (HiveConf) ctx.getConf();
    refreshLocalResources(sparkWork, hiveConf);
    JobConf jobConf = new JobConf(hiveConf);

    // Create temporary scratch dir
    Path emptyScratchDir;//from  ww  w. ja v a 2s.c  om
    emptyScratchDir = ctx.getMRTmpPath();
    FileSystem fs = emptyScratchDir.getFileSystem(jobConf);
    fs.mkdirs(emptyScratchDir);

    SparkCounters sparkCounters = new SparkCounters(sc);
    Map<String, List<String>> prefixes = sparkWork.getRequiredCounterPrefix();
    if (prefixes != null) {
        for (String group : prefixes.keySet()) {
            for (String counterName : prefixes.get(group)) {
                sparkCounters.createCounter(group, counterName);
            }
        }
    }
    SparkReporter sparkReporter = new SparkReporter(sparkCounters);

    // Generate Spark plan
    SparkPlanGenerator gen = new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter);
    SparkPlan plan = gen.generate(sparkWork);

    // Execute generated plan.
    JavaPairRDD<HiveKey, BytesWritable> finalRDD = plan.generateGraph();
    // We use Spark RDD async action to submit job as it's the only way to get jobId now.
    JavaFutureAction<Void> future = finalRDD.foreachAsync(HiveVoidFunction.getInstance());
    // As we always use foreach action to submit RDD graph, it would only trigger one job.
    int jobId = future.jobIds().get(0);
    LocalSparkJobStatus sparkJobStatus = new LocalSparkJobStatus(sc, jobId, jobMetricsListener, sparkCounters,
            plan.getCachedRDDIds(), future);
    return new LocalSparkJobRef(Integer.toString(jobId), hiveConf, sparkJobStatus, sc);
}