Java tutorial
/******************************************************************************* * Copyright 2015, The IKANOW Open Source Project. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package com.ikanow.aleph2.analytics.hadoop.services; import java.io.File; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Optional; import java.util.concurrent.CompletableFuture; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.Cluster; import org.apache.hadoop.mapreduce.Job; import com.google.inject.Module; import com.ikanow.aleph2.analytics.hadoop.utils.HadoopTechnologyUtils; import com.ikanow.aleph2.analytics.hadoop.utils.HadoopErrorUtils; import com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext; import com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyService; import com.ikanow.aleph2.data_model.interfaces.shared_services.IExtraDependencyLoader; import com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean; import com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadTriggerBean.AnalyticThreadComplexTriggerBean; import com.ikanow.aleph2.data_model.objects.data_import.BucketDiffBean; import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean; import com.ikanow.aleph2.data_model.objects.shared.BasicMessageBean; import com.ikanow.aleph2.data_model.objects.shared.ProcessingTestSpecBean; import com.ikanow.aleph2.data_model.utils.BeanTemplateUtils; import com.ikanow.aleph2.data_model.utils.BucketUtils; import com.ikanow.aleph2.data_model.utils.ErrorUtils; import com.ikanow.aleph2.data_model.utils.FutureUtils; import com.ikanow.aleph2.data_model.utils.FutureUtils.ManagementFuture; import com.ikanow.aleph2.data_model.utils.Lambdas; import com.ikanow.aleph2.data_model.utils.SetOnce; import fj.data.Validation; /** Hadoop analytic technology module - provides the interface between Hadoop and Aleph2 * @author Alex */ public class HadoopTechnologyService implements IAnalyticsTechnologyService, IExtraDependencyLoader { protected SetOnce<Configuration> _config = new SetOnce<>(); /** This service needs to load some additional classes via Guice. Here's the module that defines the bindings * @return */ public static List<Module> getExtraDependencyModules() { return Collections.emptyList(); } @Override public void youNeedToImplementTheStaticFunctionCalled_getExtraDependencyModules() { //(done see above) } @Override public void onInit(IAnalyticsContext context) { try { if (!_config.isSet()) { _config.trySet( HadoopTechnologyUtils.getHadoopConfig(context.getServiceContext().getGlobalProperties())); } } catch (Throwable t) { } } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#canRunOnThisNode(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext) */ @Override public boolean canRunOnThisNode(DataBucketBean analytic_bucket, Collection<AnalyticThreadJobBean> jobs, IAnalyticsContext context) { // Here's a simple check if someone's made a token effort to install Hadoop: try { File hadoop_installed = new File( context.getServiceContext().getGlobalProperties().local_yarn_config_dir() + File.separator + "core-site.xml"); return hadoop_installed.exists(); } catch (Throwable t) { return false; } } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onNewThread(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext, boolean) */ @Override public CompletableFuture<BasicMessageBean> onNewThread(DataBucketBean new_analytic_bucket, Collection<AnalyticThreadJobBean> jobs, IAnalyticsContext context, boolean enabled) { return CompletableFuture.completedFuture(HadoopTechnologyUtils.validateJobs(new_analytic_bucket, jobs)); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onUpdatedThread(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, boolean, java.util.Optional, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext) */ @Override public CompletableFuture<BasicMessageBean> onUpdatedThread(DataBucketBean old_analytic_bucket, DataBucketBean new_analytic_bucket, Collection<AnalyticThreadJobBean> jobs, boolean is_enabled, Optional<BucketDiffBean> diff, IAnalyticsContext context) { return CompletableFuture.completedFuture(HadoopTechnologyUtils.validateJobs(new_analytic_bucket, jobs)); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onDeleteThread(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext) */ @Override public CompletableFuture<BasicMessageBean> onDeleteThread(DataBucketBean to_delete_analytic_bucket, Collection<AnalyticThreadJobBean> jobs, IAnalyticsContext context) { // Nothing to do here return CompletableFuture.completedFuture(ErrorUtils.buildSuccessMessage(this, "onDeleteThread", "")); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#checkCustomTrigger(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadTriggerBean.AnalyticThreadComplexTriggerBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext) */ @Override public ManagementFuture<Boolean> checkCustomTrigger(DataBucketBean analytic_bucket, AnalyticThreadComplexTriggerBean trigger, IAnalyticsContext context) { // No custom triggers supported return FutureUtils.createManagementFuture(CompletableFuture.completedFuture(false), CompletableFuture.completedFuture(Arrays.asList( ErrorUtils.buildErrorMessage(this, "checkCustomTrigger", "No custom triggers supported")))); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onThreadExecute(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, java.util.Collection, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext) */ @Override public CompletableFuture<BasicMessageBean> onThreadExecute(DataBucketBean new_analytic_bucket, Collection<AnalyticThreadJobBean> jobs, Collection<AnalyticThreadComplexTriggerBean> matching_triggers, IAnalyticsContext context) { // Nothing to do here return CompletableFuture.completedFuture(ErrorUtils.buildSuccessMessage(this, "onThreadExecute", "")); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onThreadComplete(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext) */ @Override public CompletableFuture<BasicMessageBean> onThreadComplete(DataBucketBean completed_analytic_bucket, Collection<AnalyticThreadJobBean> jobs, IAnalyticsContext context) { // Nothing to do here return CompletableFuture.completedFuture(ErrorUtils.buildSuccessMessage(this, "onThreadComplete", "")); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onPurge(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext) */ @Override public CompletableFuture<BasicMessageBean> onPurge(DataBucketBean purged_analytic_bucket, Collection<AnalyticThreadJobBean> jobs, IAnalyticsContext context) { // Nothing to do here return CompletableFuture.completedFuture(ErrorUtils.buildSuccessMessage(this, "onPurge", "")); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onPeriodicPoll(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext) */ @Override public CompletableFuture<BasicMessageBean> onPeriodicPoll(DataBucketBean polled_analytic_bucket, Collection<AnalyticThreadJobBean> jobs, IAnalyticsContext context) { // Nothing to do here return CompletableFuture.completedFuture(ErrorUtils.buildSuccessMessage(this, "onPeriodicPoll", "")); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onTestThread(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.objects.shared.ProcessingTestSpecBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext) */ @Override public CompletableFuture<BasicMessageBean> onTestThread(DataBucketBean test_bucket, Collection<AnalyticThreadJobBean> jobs, ProcessingTestSpecBean test_spec, IAnalyticsContext context) { return CompletableFuture.completedFuture(HadoopTechnologyUtils.validateJobs(test_bucket, jobs)); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onTestThread(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.objects.shared.ProcessingTestSpecBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext) */ @Override public CompletableFuture<BasicMessageBean> startAnalyticJob(DataBucketBean analytic_bucket, Collection<AnalyticThreadJobBean> jobs, AnalyticThreadJobBean job_to_start, IAnalyticsContext context) { return CompletableFuture.completedFuture( startAnalyticJobOrTest(analytic_bucket, jobs, job_to_start, context, Optional.empty()).validation( fail -> ErrorUtils.buildErrorMessage(this.getClass().getName(), "startAnalyticJob", fail), success -> ErrorUtils.buildSuccessMessage(this.getClass().getName(), "startAnalyticJob", success.getJobID().toString()))); } /** * @param analytic_bucket * @param jobs * @param job_to_start * @param context * @param test_spec * @return */ public Validation<String, Job> startAnalyticJobOrTest(DataBucketBean analytic_bucket, Collection<AnalyticThreadJobBean> jobs, AnalyticThreadJobBean job_to_start, IAnalyticsContext context, Optional<ProcessingTestSpecBean> test_spec) { //TODO (ALEPH-12): check if it's actually a batch enrichment first final BatchEnrichmentContext wrapped_context = new BatchEnrichmentContext(context); wrapped_context.setJob(job_to_start); // Create a pretend bucket that has this job as the (sole) enrichment topology... final DataBucketBean converted_bucket = (null != analytic_bucket.batch_enrichment_configs()) ? analytic_bucket : BeanTemplateUtils.clone(analytic_bucket) .with(DataBucketBean::master_enrichment_type, DataBucketBean.MasterEnrichmentType.batch) .with(DataBucketBean::batch_enrichment_configs, HadoopTechnologyUtils .convertAnalyticJob(job_to_start.name(), job_to_start.config())) .done(); wrapped_context.setBucket(converted_bucket); final BeJobLauncher beJobService = new BeJobLauncher( wrapped_context.getServiceContext().getGlobalProperties(), wrapped_context); final Validation<String, Job> result = beJobService.runEnhancementJob(converted_bucket, test_spec); return result; } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#stopAnalyticJob(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext) */ @Override public CompletableFuture<BasicMessageBean> stopAnalyticJob(DataBucketBean analytic_bucket, Collection<AnalyticThreadJobBean> jobs, AnalyticThreadJobBean job_to_stop, IAnalyticsContext context) { try { final Cluster cluster = new Cluster(_config.get()); final String job_name = BucketUtils.getUniqueSignature(analytic_bucket.full_name(), Optional.ofNullable(job_to_stop.name())); return Arrays.stream(cluster.getAllJobStatuses()) .filter(job_status -> job_status.getJobName().equals(job_name)).findFirst() .map(Lambdas.wrap_u(job_status -> { final Job job = cluster.getJob(job_status.getJobID()); job.killJob(); return CompletableFuture .completedFuture(ErrorUtils.buildSuccessMessage(this.getClass().getSimpleName(), "stopAnalyticJob", analytic_bucket.full_name() + ":" + job_to_stop.name())); })).get() // (Will throw if not found falling through to catch below) ; } catch (Throwable t) { return CompletableFuture.completedFuture( ErrorUtils.buildSuccessMessage(this.getClass().getSimpleName(), "stopAnalyticJob", HadoopErrorUtils.JOB_STOP_ERROR, job_to_stop.name(), analytic_bucket.full_name())); } } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#resumeAnalyticJob(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext) */ @Override public CompletableFuture<BasicMessageBean> resumeAnalyticJob(DataBucketBean analytic_bucket, Collection<AnalyticThreadJobBean> jobs, AnalyticThreadJobBean job_to_resume, IAnalyticsContext context) { // (no specific resume function, just use start) return startAnalyticJob(analytic_bucket, jobs, job_to_resume, context); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#suspendAnalyticJob(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext) */ @Override public CompletableFuture<BasicMessageBean> suspendAnalyticJob(DataBucketBean analytic_bucket, Collection<AnalyticThreadJobBean> jobs, AnalyticThreadJobBean job_to_suspend, IAnalyticsContext context) { // (no specific suspend function, just use stop) return stopAnalyticJob(analytic_bucket, jobs, job_to_suspend, context); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#startAnalyticJobTest(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean, com.ikanow.aleph2.data_model.objects.shared.ProcessingTestSpecBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext) */ @Override public CompletableFuture<BasicMessageBean> startAnalyticJobTest(DataBucketBean analytic_bucket, Collection<AnalyticThreadJobBean> jobs, AnalyticThreadJobBean job_to_test, ProcessingTestSpecBean test_spec, IAnalyticsContext context) { return CompletableFuture.completedFuture( startAnalyticJobOrTest(analytic_bucket, jobs, job_to_test, context, Optional.of(test_spec)) .validation( fail -> ErrorUtils.buildErrorMessage(this.getClass().getName(), "startAnalyticJobTest", fail), success -> ErrorUtils.buildSuccessMessage(this.getClass().getName(), "startAnalyticJobTest", success.getJobID().toString()))); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#checkAnalyticJobProgress(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext) */ @Override public ManagementFuture<Boolean> checkAnalyticJobProgress(DataBucketBean analytic_bucket, Collection<AnalyticThreadJobBean> jobs, AnalyticThreadJobBean job_to_check, IAnalyticsContext context) { try { final Cluster cluster = new Cluster(_config.get()); final String job_name = BucketUtils.getUniqueSignature(analytic_bucket.full_name(), Optional.ofNullable(job_to_check.name())); return Arrays.stream(cluster.getAllJobStatuses()) .filter(job_status -> job_status.getJobName().equals(job_name)).findFirst().map(job_status -> { //TODO (ALEPH-12): create useful info in the side channel beans ... eg if it's an error? // (need to get the job first, then get more info) return FutureUtils.createManagementFuture( CompletableFuture.completedFuture(job_status.isJobComplete()), CompletableFuture.completedFuture( Arrays.asList(ErrorUtils.buildMessage(true, this.getClass().getSimpleName(), "checkAnalyticJobProgress", "TBD: more status")))); }).get() // (Will throw if not found falling through to catch below) ; } catch (Throwable t) { return FutureUtils.createManagementFuture(CompletableFuture.completedFuture(true)); } } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.shared_services.IUnderlyingService#getUnderlyingArtefacts() */ @Override public Collection<Object> getUnderlyingArtefacts() { return Arrays.asList(this); } /* (non-Javadoc) * @see com.ikanow.aleph2.data_model.interfaces.shared_services.IUnderlyingService#getUnderlyingPlatformDriver(java.lang.Class, java.util.Optional) */ @Override public <T> Optional<T> getUnderlyingPlatformDriver(Class<T> driver_class, Optional<String> driver_options) { return Optional.empty(); } }