com.ikanow.aleph2.analytics.hadoop.services.HadoopTechnologyService.java Source code

Java tutorial

Introduction

Here is the source code for com.ikanow.aleph2.analytics.hadoop.services.HadoopTechnologyService.java

Source

/*******************************************************************************
 * Copyright 2015, The IKANOW Open Source Project.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package com.ikanow.aleph2.analytics.hadoop.services;

import java.io.File;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Cluster;
import org.apache.hadoop.mapreduce.Job;

import com.google.inject.Module;
import com.ikanow.aleph2.analytics.hadoop.utils.HadoopTechnologyUtils;
import com.ikanow.aleph2.analytics.hadoop.utils.HadoopErrorUtils;
import com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext;
import com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyService;
import com.ikanow.aleph2.data_model.interfaces.shared_services.IExtraDependencyLoader;
import com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean;
import com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadTriggerBean.AnalyticThreadComplexTriggerBean;
import com.ikanow.aleph2.data_model.objects.data_import.BucketDiffBean;
import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean;
import com.ikanow.aleph2.data_model.objects.shared.BasicMessageBean;
import com.ikanow.aleph2.data_model.objects.shared.ProcessingTestSpecBean;
import com.ikanow.aleph2.data_model.utils.BeanTemplateUtils;
import com.ikanow.aleph2.data_model.utils.BucketUtils;
import com.ikanow.aleph2.data_model.utils.ErrorUtils;
import com.ikanow.aleph2.data_model.utils.FutureUtils;
import com.ikanow.aleph2.data_model.utils.FutureUtils.ManagementFuture;
import com.ikanow.aleph2.data_model.utils.Lambdas;
import com.ikanow.aleph2.data_model.utils.SetOnce;

import fj.data.Validation;

/** Hadoop analytic technology module - provides the interface between Hadoop and Aleph2
 * @author Alex
 */
public class HadoopTechnologyService implements IAnalyticsTechnologyService, IExtraDependencyLoader {

    protected SetOnce<Configuration> _config = new SetOnce<>();

    /** This service needs to load some additional classes via Guice. Here's the module that defines the bindings
     * @return
     */
    public static List<Module> getExtraDependencyModules() {
        return Collections.emptyList();
    }

    @Override
    public void youNeedToImplementTheStaticFunctionCalled_getExtraDependencyModules() {
        //(done see above)
    }

    @Override
    public void onInit(IAnalyticsContext context) {
        try {
            if (!_config.isSet()) {
                _config.trySet(
                        HadoopTechnologyUtils.getHadoopConfig(context.getServiceContext().getGlobalProperties()));
            }
        } catch (Throwable t) {
        }
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#canRunOnThisNode(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext)
     */
    @Override
    public boolean canRunOnThisNode(DataBucketBean analytic_bucket, Collection<AnalyticThreadJobBean> jobs,
            IAnalyticsContext context) {

        // Here's a simple check if someone's made a token effort to install Hadoop:

        try {
            File hadoop_installed = new File(
                    context.getServiceContext().getGlobalProperties().local_yarn_config_dir() + File.separator
                            + "core-site.xml");
            return hadoop_installed.exists();
        } catch (Throwable t) {
            return false;
        }
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onNewThread(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext, boolean)
     */
    @Override
    public CompletableFuture<BasicMessageBean> onNewThread(DataBucketBean new_analytic_bucket,
            Collection<AnalyticThreadJobBean> jobs, IAnalyticsContext context, boolean enabled) {
        return CompletableFuture.completedFuture(HadoopTechnologyUtils.validateJobs(new_analytic_bucket, jobs));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onUpdatedThread(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, boolean, java.util.Optional, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext)
     */
    @Override
    public CompletableFuture<BasicMessageBean> onUpdatedThread(DataBucketBean old_analytic_bucket,
            DataBucketBean new_analytic_bucket, Collection<AnalyticThreadJobBean> jobs, boolean is_enabled,
            Optional<BucketDiffBean> diff, IAnalyticsContext context) {
        return CompletableFuture.completedFuture(HadoopTechnologyUtils.validateJobs(new_analytic_bucket, jobs));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onDeleteThread(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext)
     */
    @Override
    public CompletableFuture<BasicMessageBean> onDeleteThread(DataBucketBean to_delete_analytic_bucket,
            Collection<AnalyticThreadJobBean> jobs, IAnalyticsContext context) {
        // Nothing to do here
        return CompletableFuture.completedFuture(ErrorUtils.buildSuccessMessage(this, "onDeleteThread", ""));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#checkCustomTrigger(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadTriggerBean.AnalyticThreadComplexTriggerBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext)
     */
    @Override
    public ManagementFuture<Boolean> checkCustomTrigger(DataBucketBean analytic_bucket,
            AnalyticThreadComplexTriggerBean trigger, IAnalyticsContext context) {
        // No custom triggers supported
        return FutureUtils.createManagementFuture(CompletableFuture.completedFuture(false),
                CompletableFuture.completedFuture(Arrays.asList(
                        ErrorUtils.buildErrorMessage(this, "checkCustomTrigger", "No custom triggers supported"))));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onThreadExecute(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, java.util.Collection, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext)
     */
    @Override
    public CompletableFuture<BasicMessageBean> onThreadExecute(DataBucketBean new_analytic_bucket,
            Collection<AnalyticThreadJobBean> jobs, Collection<AnalyticThreadComplexTriggerBean> matching_triggers,
            IAnalyticsContext context) {
        // Nothing to do here
        return CompletableFuture.completedFuture(ErrorUtils.buildSuccessMessage(this, "onThreadExecute", ""));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onThreadComplete(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext)
     */
    @Override
    public CompletableFuture<BasicMessageBean> onThreadComplete(DataBucketBean completed_analytic_bucket,
            Collection<AnalyticThreadJobBean> jobs, IAnalyticsContext context) {
        // Nothing to do here
        return CompletableFuture.completedFuture(ErrorUtils.buildSuccessMessage(this, "onThreadComplete", ""));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onPurge(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext)
     */
    @Override
    public CompletableFuture<BasicMessageBean> onPurge(DataBucketBean purged_analytic_bucket,
            Collection<AnalyticThreadJobBean> jobs, IAnalyticsContext context) {
        // Nothing to do here
        return CompletableFuture.completedFuture(ErrorUtils.buildSuccessMessage(this, "onPurge", ""));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onPeriodicPoll(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext)
     */
    @Override
    public CompletableFuture<BasicMessageBean> onPeriodicPoll(DataBucketBean polled_analytic_bucket,
            Collection<AnalyticThreadJobBean> jobs, IAnalyticsContext context) {
        // Nothing to do here
        return CompletableFuture.completedFuture(ErrorUtils.buildSuccessMessage(this, "onPeriodicPoll", ""));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onTestThread(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.objects.shared.ProcessingTestSpecBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext)
     */
    @Override
    public CompletableFuture<BasicMessageBean> onTestThread(DataBucketBean test_bucket,
            Collection<AnalyticThreadJobBean> jobs, ProcessingTestSpecBean test_spec, IAnalyticsContext context) {
        return CompletableFuture.completedFuture(HadoopTechnologyUtils.validateJobs(test_bucket, jobs));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#onTestThread(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.objects.shared.ProcessingTestSpecBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext)
     */
    @Override
    public CompletableFuture<BasicMessageBean> startAnalyticJob(DataBucketBean analytic_bucket,
            Collection<AnalyticThreadJobBean> jobs, AnalyticThreadJobBean job_to_start, IAnalyticsContext context) {
        return CompletableFuture.completedFuture(
                startAnalyticJobOrTest(analytic_bucket, jobs, job_to_start, context, Optional.empty()).validation(
                        fail -> ErrorUtils.buildErrorMessage(this.getClass().getName(), "startAnalyticJob", fail),
                        success -> ErrorUtils.buildSuccessMessage(this.getClass().getName(), "startAnalyticJob",
                                success.getJobID().toString())));
    }

    /**
     * @param analytic_bucket
     * @param jobs
     * @param job_to_start
     * @param context
     * @param test_spec
     * @return
     */
    public Validation<String, Job> startAnalyticJobOrTest(DataBucketBean analytic_bucket,
            Collection<AnalyticThreadJobBean> jobs, AnalyticThreadJobBean job_to_start, IAnalyticsContext context,
            Optional<ProcessingTestSpecBean> test_spec) {
        //TODO (ALEPH-12): check if it's actually a batch enrichment first

        final BatchEnrichmentContext wrapped_context = new BatchEnrichmentContext(context);
        wrapped_context.setJob(job_to_start);

        // Create a pretend bucket that has this job as the (sole) enrichment topology...
        final DataBucketBean converted_bucket = (null != analytic_bucket.batch_enrichment_configs())
                ? analytic_bucket
                : BeanTemplateUtils.clone(analytic_bucket)
                        .with(DataBucketBean::master_enrichment_type, DataBucketBean.MasterEnrichmentType.batch)
                        .with(DataBucketBean::batch_enrichment_configs, HadoopTechnologyUtils
                                .convertAnalyticJob(job_to_start.name(), job_to_start.config()))
                        .done();

        wrapped_context.setBucket(converted_bucket);

        final BeJobLauncher beJobService = new BeJobLauncher(
                wrapped_context.getServiceContext().getGlobalProperties(), wrapped_context);
        final Validation<String, Job> result = beJobService.runEnhancementJob(converted_bucket, test_spec);

        return result;
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#stopAnalyticJob(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext)
     */
    @Override
    public CompletableFuture<BasicMessageBean> stopAnalyticJob(DataBucketBean analytic_bucket,
            Collection<AnalyticThreadJobBean> jobs, AnalyticThreadJobBean job_to_stop, IAnalyticsContext context) {
        try {
            final Cluster cluster = new Cluster(_config.get());
            final String job_name = BucketUtils.getUniqueSignature(analytic_bucket.full_name(),
                    Optional.ofNullable(job_to_stop.name()));
            return Arrays.stream(cluster.getAllJobStatuses())
                    .filter(job_status -> job_status.getJobName().equals(job_name)).findFirst()
                    .map(Lambdas.wrap_u(job_status -> {
                        final Job job = cluster.getJob(job_status.getJobID());
                        job.killJob();
                        return CompletableFuture
                                .completedFuture(ErrorUtils.buildSuccessMessage(this.getClass().getSimpleName(),
                                        "stopAnalyticJob", analytic_bucket.full_name() + ":" + job_to_stop.name()));
                    })).get() // (Will throw if not found falling through to catch below)
            ;
        } catch (Throwable t) {
            return CompletableFuture.completedFuture(
                    ErrorUtils.buildSuccessMessage(this.getClass().getSimpleName(), "stopAnalyticJob",
                            HadoopErrorUtils.JOB_STOP_ERROR, job_to_stop.name(), analytic_bucket.full_name()));
        }
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#resumeAnalyticJob(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext)
     */
    @Override
    public CompletableFuture<BasicMessageBean> resumeAnalyticJob(DataBucketBean analytic_bucket,
            Collection<AnalyticThreadJobBean> jobs, AnalyticThreadJobBean job_to_resume,
            IAnalyticsContext context) {
        // (no specific resume function, just use start)
        return startAnalyticJob(analytic_bucket, jobs, job_to_resume, context);
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#suspendAnalyticJob(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext)
     */
    @Override
    public CompletableFuture<BasicMessageBean> suspendAnalyticJob(DataBucketBean analytic_bucket,
            Collection<AnalyticThreadJobBean> jobs, AnalyticThreadJobBean job_to_suspend,
            IAnalyticsContext context) {
        // (no specific suspend function, just use stop)
        return stopAnalyticJob(analytic_bucket, jobs, job_to_suspend, context);
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#startAnalyticJobTest(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean, com.ikanow.aleph2.data_model.objects.shared.ProcessingTestSpecBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext)
     */
    @Override
    public CompletableFuture<BasicMessageBean> startAnalyticJobTest(DataBucketBean analytic_bucket,
            Collection<AnalyticThreadJobBean> jobs, AnalyticThreadJobBean job_to_test,
            ProcessingTestSpecBean test_spec, IAnalyticsContext context) {
        return CompletableFuture.completedFuture(
                startAnalyticJobOrTest(analytic_bucket, jobs, job_to_test, context, Optional.of(test_spec))
                        .validation(
                                fail -> ErrorUtils.buildErrorMessage(this.getClass().getName(),
                                        "startAnalyticJobTest", fail),
                                success -> ErrorUtils.buildSuccessMessage(this.getClass().getName(),
                                        "startAnalyticJobTest", success.getJobID().toString())));
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsTechnologyModule#checkAnalyticJobProgress(com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean, java.util.Collection, com.ikanow.aleph2.data_model.objects.data_analytics.AnalyticThreadJobBean, com.ikanow.aleph2.data_model.interfaces.data_analytics.IAnalyticsContext)
     */
    @Override
    public ManagementFuture<Boolean> checkAnalyticJobProgress(DataBucketBean analytic_bucket,
            Collection<AnalyticThreadJobBean> jobs, AnalyticThreadJobBean job_to_check, IAnalyticsContext context) {

        try {
            final Cluster cluster = new Cluster(_config.get());
            final String job_name = BucketUtils.getUniqueSignature(analytic_bucket.full_name(),
                    Optional.ofNullable(job_to_check.name()));
            return Arrays.stream(cluster.getAllJobStatuses())
                    .filter(job_status -> job_status.getJobName().equals(job_name)).findFirst().map(job_status -> {
                        //TODO (ALEPH-12): create useful info in the side channel beans ... eg if it's an error?
                        // (need to get the job first, then get more info)
                        return FutureUtils.createManagementFuture(
                                CompletableFuture.completedFuture(job_status.isJobComplete()),
                                CompletableFuture.completedFuture(
                                        Arrays.asList(ErrorUtils.buildMessage(true, this.getClass().getSimpleName(),
                                                "checkAnalyticJobProgress", "TBD: more status"))));
                    }).get() // (Will throw if not found falling through to catch below)
            ;
        } catch (Throwable t) {
            return FutureUtils.createManagementFuture(CompletableFuture.completedFuture(true));
        }
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.shared_services.IUnderlyingService#getUnderlyingArtefacts()
     */
    @Override
    public Collection<Object> getUnderlyingArtefacts() {
        return Arrays.asList(this);
    }

    /* (non-Javadoc)
     * @see com.ikanow.aleph2.data_model.interfaces.shared_services.IUnderlyingService#getUnderlyingPlatformDriver(java.lang.Class, java.util.Optional)
     */
    @Override
    public <T> Optional<T> getUnderlyingPlatformDriver(Class<T> driver_class, Optional<String> driver_options) {
        return Optional.empty();
    }

}