List of usage examples for org.apache.spark.api.java JavaFutureAction jobIds
List<Integer> jobIds();
From source file:cn.com.bsfit.frms.spark.StatusTrackerDemo.java
License:Apache License
public static void main(String[] args) throws Exception { SparkSession spark = SparkSession.builder().appName(APP_NAME).getOrCreate(); final JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext()); // Example of implementing a progress reporter for a simple job. JavaRDD<Integer> rdd = jsc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5) .map(new IdentityWithDelay<Integer>()); JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync(); while (!jobFuture.isDone()) { Thread.sleep(1000); // 1 second List<Integer> jobIds = jobFuture.jobIds(); if (jobIds.isEmpty()) { continue; }//from w w w. j a v a2s .c o m int currentJobId = jobIds.get(jobIds.size() - 1); SparkJobInfo jobInfo = jsc.statusTracker().getJobInfo(currentJobId); SparkStageInfo stageInfo = jsc.statusTracker().getStageInfo(jobInfo.stageIds()[0]); System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() + " active, " + stageInfo.numCompletedTasks() + " complete"); } System.out.println("Job results are: " + jobFuture.get()); jsc.close(); spark.stop(); }
From source file:com.andado.spark.examples.JavaStatusTrackerDemo.java
License:Apache License
public static void main(String[] args) throws Exception { SparkSession spark = SparkSession.builder().appName(APP_NAME).getOrCreate(); final JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext()); // Example of implementing a progress reporter for a simple job. JavaRDD<Integer> rdd = jsc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5) .map(new IdentityWithDelay<Integer>()); JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync(); while (!jobFuture.isDone()) { Thread.sleep(1000); // 1 second List<Integer> jobIds = jobFuture.jobIds(); if (jobIds.isEmpty()) { continue; }// w w w . j a v a 2s . c om int currentJobId = jobIds.get(jobIds.size() - 1); SparkJobInfo jobInfo = jsc.statusTracker().getJobInfo(currentJobId); SparkStageInfo stageInfo = jsc.statusTracker().getStageInfo(jobInfo.stageIds()[0]); System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() + " active, " + stageInfo.numCompletedTasks() + " complete"); } System.out.println("Job results are: " + jobFuture.get()); spark.stop(); }
From source file:com.cloudera.livy.client.local.driver.BypassJobWrapper.java
License:Apache License
@Override protected synchronized void jobSubmitted(JavaFutureAction<?> job) { for (Integer i : job.jobIds()) { recordNewJob(i);//from ww w .jav a 2 s .c o m } }
From source file:com.cloudera.livy.client.local.driver.JobWrapper.java
License:Apache License
boolean hasSparkJobId(Integer sparkId) { for (JavaFutureAction<?> future : sparkJobs) { if (future.jobIds().contains(sparkId)) { return true; }/* ww w. j a v a2 s.c o m*/ } return false; }
From source file:com.cloudera.livy.client.local.driver.JobWrapper.java
License:Apache License
protected void jobSubmitted(JavaFutureAction<?> job) { client.jobSubmitted(jobId, job.jobIds().get(0)); }
From source file:com.dmall.order.realtime.tacking.function.JavaStatusTrackerDemo.java
License:Apache License
public static void main(String[] args) throws Exception { SparkSession spark = SparkSession.builder().appName(APP_NAME).master("spark://192.168.184.128:7077") .getOrCreate();//from w ww.j a v a2s.c o m final JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext()); jsc.addJar("D:\\learn\\java\\learn-spark\\target\\spark.jar"); // Example of implementing a progress reporter for a simple job. JavaRDD<Integer> rdd = jsc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5) .map(new IdentityWithDelay<Integer>()); JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync(); while (!jobFuture.isDone()) { Thread.sleep(1000); // 1 second List<Integer> jobIds = jobFuture.jobIds(); if (jobIds.isEmpty()) { continue; } int currentJobId = jobIds.get(jobIds.size() - 1); SparkJobInfo jobInfo = jsc.statusTracker().getJobInfo(currentJobId); SparkStageInfo stageInfo = jsc.statusTracker().getStageInfo(jobInfo.stageIds()[0]); System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() + " active, " + stageInfo.numCompletedTasks() + " complete"); } System.out.println("Job results are: " + jobFuture.get()); spark.stop(); }
From source file:com.hxr.bigdata.spark.example141.JavaStatusTrackerDemo.java
License:Apache License
public static void main(String[] args) throws Exception { SparkConf sparkConf = new SparkConf().setAppName(APP_NAME); final JavaSparkContext sc = new JavaSparkContext(sparkConf); // Example of implementing a progress reporter for a simple job. JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5) .map(new IdentityWithDelay<Integer>()); JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync(); while (!jobFuture.isDone()) { Thread.sleep(1000); // 1 second List<Integer> jobIds = jobFuture.jobIds(); if (jobIds.isEmpty()) { continue; }/*from ww w . j ava 2s.c o m*/ int currentJobId = jobIds.get(jobIds.size() - 1); SparkJobInfo jobInfo = sc.statusTracker().getJobInfo(currentJobId); SparkStageInfo stageInfo = sc.statusTracker().getStageInfo(jobInfo.stageIds()[0]); System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() + " active, " + stageInfo.numCompletedTasks() + " complete"); } System.out.println("Job results are: " + jobFuture.get()); sc.stop(); }
From source file:com.splicemachine.olap.CompactionJob.java
License:Apache License
@Override public Void call() throws Exception { if (!status.markRunning()) { //the client has already cancelled us or has died before we could get started, so stop now return null; }/*from w w w . ja va2s. com*/ initializeJob(); Configuration conf = new Configuration(HConfiguration.unwrapDelegate()); if (LOG.isTraceEnabled()) { LOG.trace("regionLocation = " + compactionRequest.regionLocation); } conf.set(MRConstants.REGION_LOCATION, compactionRequest.regionLocation); conf.set(MRConstants.COMPACTION_FILES, getCompactionFilesBase64String()); SpliceSpark.pushScope(compactionRequest.scope + ": Parallelize"); //JavaRDD rdd1 = SpliceSpark.getContext().parallelize(files, 1); //ParallelCollectionRDD rdd1 = getCompactionRDD(); JavaSparkContext context = SpliceSpark.getContext(); JavaPairRDD<Integer, Iterator> rdd1 = context.newAPIHadoopRDD(conf, CompactionInputFormat.class, Integer.class, Iterator.class); rdd1.setName("Distribute Compaction Load"); SpliceSpark.popScope(); SpliceSpark.pushScope(compactionRequest.scope + ": Compact files"); JavaRDD<String> rdd2 = rdd1.mapPartitions(new SparkFlatMapFunction<>(compactionRequest.compactionFunction)); rdd2.setName(compactionRequest.jobDetails); SpliceSpark.popScope(); SpliceSpark.pushScope("Compaction"); if (!status.isRunning()) { //the client timed out during our setup, so it's time to stop return null; } long startTime = clock.currentTimeMillis(); JavaFutureAction<List<String>> collectFuture = rdd2.collectAsync(); while (!collectFuture.isDone()) { try { collectFuture.get(tickTime, TimeUnit.MILLISECONDS); } catch (TimeoutException te) { /* * A TimeoutException just means that tickTime expired. That's okay, we just stick our * head up and make sure that the client is still operating */ } if (!status.isRunning()) { /* * The client timed out, so cancel the compaction and terminate */ collectFuture.cancel(true); context.cancelJobGroup(compactionRequest.jobGroup); return null; } if (clock.currentTimeMillis() - startTime > compactionRequest.maxWait) { // Make sure compaction is scheduled in Spark and running, otherwise cancel it and fallback to in-HBase compaction if (!compactionRunning(collectFuture.jobIds())) { collectFuture.cancel(true); context.cancelJobGroup(compactionRequest.jobGroup); status.markCompleted(new FailedOlapResult(new RejectedExecutionException( "No resources available for running compaction in Spark"))); return null; } } } //the compaction completed List<String> sPaths = collectFuture.get(); status.markCompleted(new CompactionResult(sPaths)); SpliceSpark.popScope(); if (LOG.isTraceEnabled()) SpliceLogUtils.trace(LOG, "Paths Returned: %s", sPaths); return null; }
From source file:gtl.spark.java.example.apache.JavaStatusTrackerDemo.java
License:Apache License
public static void main(String[] args) throws Exception { SparkSession spark = SparkSession.builder().appName(APP_NAME).getOrCreate(); JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext()); // Example of implementing a progress reporter for a simple job. JavaRDD<Integer> rdd = jsc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5).map(new IdentityWithDelay<>()); JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync(); while (!jobFuture.isDone()) { Thread.sleep(1000); // 1 second List<Integer> jobIds = jobFuture.jobIds(); if (jobIds.isEmpty()) { continue; }/*w ww . j av a 2 s . c o m*/ int currentJobId = jobIds.get(jobIds.size() - 1); SparkJobInfo jobInfo = jsc.statusTracker().getJobInfo(currentJobId); SparkStageInfo stageInfo = jsc.statusTracker().getStageInfo(jobInfo.stageIds()[0]); System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() + " active, " + stageInfo.numCompletedTasks() + " complete"); } System.out.println("Job results are: " + jobFuture.get()); spark.stop(); }
From source file:org.apache.hadoop.hive.ql.exec.spark.LocalHiveSparkClient.java
License:Apache License
@Override public SparkJobRef execute(DriverContext driverContext, SparkWork sparkWork) throws Exception { Context ctx = driverContext.getCtx(); HiveConf hiveConf = (HiveConf) ctx.getConf(); refreshLocalResources(sparkWork, hiveConf); JobConf jobConf = new JobConf(hiveConf); // Create temporary scratch dir Path emptyScratchDir;//from ww w. ja v a 2s.c om emptyScratchDir = ctx.getMRTmpPath(); FileSystem fs = emptyScratchDir.getFileSystem(jobConf); fs.mkdirs(emptyScratchDir); SparkCounters sparkCounters = new SparkCounters(sc); Map<String, List<String>> prefixes = sparkWork.getRequiredCounterPrefix(); if (prefixes != null) { for (String group : prefixes.keySet()) { for (String counterName : prefixes.get(group)) { sparkCounters.createCounter(group, counterName); } } } SparkReporter sparkReporter = new SparkReporter(sparkCounters); // Generate Spark plan SparkPlanGenerator gen = new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter); SparkPlan plan = gen.generate(sparkWork); // Execute generated plan. JavaPairRDD<HiveKey, BytesWritable> finalRDD = plan.generateGraph(); // We use Spark RDD async action to submit job as it's the only way to get jobId now. JavaFutureAction<Void> future = finalRDD.foreachAsync(HiveVoidFunction.getInstance()); // As we always use foreach action to submit RDD graph, it would only trigger one job. int jobId = future.jobIds().get(0); LocalSparkJobStatus sparkJobStatus = new LocalSparkJobStatus(sc, jobId, jobMetricsListener, sparkCounters, plan.getCachedRDDIds(), future); return new LocalSparkJobRef(Integer.toString(jobId), hiveConf, sparkJobStatus, sc); }