Java tutorial
/******************************************************************************* * Copyright 2015, The IKANOW Open Source Project. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ package com.ikanow.aleph2.example.external_harvester.services; import java.io.IOException; import java.util.Date; import java.util.Optional; import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; import java.util.stream.StreamSupport; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import scala.Tuple2; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.ikanow.aleph2.data_model.interfaces.data_import.IHarvestContext; import com.ikanow.aleph2.data_model.interfaces.shared_services.ICrudService; import com.ikanow.aleph2.data_model.interfaces.shared_services.IManagementCrudService; import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean; import com.ikanow.aleph2.data_model.objects.data_import.DataBucketBean.MasterEnrichmentType; import com.ikanow.aleph2.data_model.objects.shared.SharedLibraryBean; import com.ikanow.aleph2.data_model.utils.BeanTemplateUtils; import com.ikanow.aleph2.data_model.utils.ContextUtils; import com.ikanow.aleph2.data_model.utils.CrudUtils; import com.ikanow.aleph2.example.external_harvester.data_model.GlobalConfigBean; import com.ikanow.aleph2.example.external_harvester.data_model.ProcessInfoBean; import fj.data.Either; public class ExternalProcessLaunchService { final static Logger _logger = LogManager.getLogger(); public static void main(String[] args) throws InstantiationException, IllegalAccessException, ClassNotFoundException, JsonProcessingException, IOException, InterruptedException, ExecutionException { final ObjectMapper mapper = BeanTemplateUtils.configureMapper(Optional.empty()); // Get the context (unused here) final IHarvestContext context = ContextUtils.getHarvestContext(args[0]); final DataBucketBean bucket = context.getBucket().get(); _logger.info("Launched context, eg bucket status = : " + BeanTemplateUtils.toJson(context.getBucketStatus(Optional.empty()).get())); _logger.info("Retrieved bucket from CON: " + BeanTemplateUtils.toJson(bucket)); // Get the bucket (unused here) _logger.info("Retrieved arg from CLI: " + args[1]); // Check that joins the cluster if I request the data bucket store //context.getService(IManagementDbService.class, Optional.of("core_management_db")).get().getDataBucketStore(); //(But not if it's in read only mode) final IManagementCrudService<DataBucketBean> bucket_service = context.getServiceContext() .getCoreManagementDbService().readOnlyVersion().getDataBucketStore(); _logger.info("Getting Management DB and reading number of buckets = " + bucket_service.countObjects().get().intValue()); // Demonstration of accessing (read only) library state information: final Tuple2<SharedLibraryBean, Optional<GlobalConfigBean>> lib_config = ExternalProcessHarvestTechnology .getConfig(context); _logger.info("Retrieved library configuration: " + lib_config._2().map(g -> BeanTemplateUtils.toJson(g).toString()).orElse("(no config)")); // 1) Preferred method of getting per library state: final ICrudService<ProcessInfoBean> pid_crud = context .getGlobalHarvestTechnologyObjectStore(ProcessInfoBean.class, ProcessInfoBean.PID_COLLECTION_NAME); // 2) Lower level way: //final IManagementDbService core_db = context.getServiceContext().getCoreManagementDbService(); //final ICrudService<ProcessInfoBean> pid_crud = core_db.getPerLibraryState(ProcessInfoBean.class, lib_config._1(), ProcessInfoBean.PID_COLLECTION_NAME); // 3) Alternatively (this construct is how you get per bucket state also): //final ICrudService<ProcessInfoBean> pid_crud = context.getBucketObjectStore(ProcessInfoBean.class, Optional.empty(), ProcessInfoBean.PID_COLLECTION_NAME, Optional.of(AssetStateDirectoryBean.StateDirectoryType.library)); lib_config._2().ifPresent(gc -> { if (gc.store_pids_in_db()) pid_crud.getObjectsBySpec(CrudUtils.allOf(ProcessInfoBean.class).when(ProcessInfoBean::bucket_name, bucket.full_name())).thenAccept(cursor -> { String pids = StreamSupport.stream(cursor.spliterator(), false).map(c -> c._id()) .collect(Collectors.joining(",")); _logger.info("Pids/hostnames for this bucket: " + pids); }).exceptionally(err -> { _logger.error("Failed to get bucket pids", err); return null; }); }); // Just run for 10 minutes as an experiment for (int i = 0; i < 60; ++i) { // Example of promoting data to next stage if ((MasterEnrichmentType.streaming == bucket.master_enrichment_type()) || (MasterEnrichmentType.streaming_and_batch == bucket.master_enrichment_type())) { // Send an object to kafka final JsonNode json = mapper.createObjectNode().put("@timestamp", new Date().getTime()) .put("test_str", "test" + i).put("test_int", i); _logger.info("Sending object to kafka: " + json); context.sendObjectToStreamingPipeline(Optional.empty(), Either.left(json)); } _logger.info("(sleeping: " + i + ")"); try { Thread.sleep(10L * 1000L); } catch (Exception e) { } } } }