Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.storm.task; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.storm.generated.GlobalStreamId; import org.apache.storm.generated.Grouping; import org.apache.storm.generated.StormTopology; import org.apache.storm.hooks.ITaskHook; import org.apache.storm.metric.api.IMetric; import org.apache.storm.metric.api.IReducer; import org.apache.storm.metric.api.ICombiner; import org.apache.storm.metric.api.ReducedMetric; import org.apache.storm.metric.api.CombinedMetric; import org.apache.storm.state.ISubscribedState; import org.apache.storm.tuple.Fields; import org.apache.storm.utils.Utils; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.lang.NotImplementedException; import org.json.simple.JSONValue; /** * A `TopologyContext` is given to bolts and spouts in their `prepare()` and `open()` * methods, respectively. This object provides information about the component's * place within the topology, such as task ids, inputs and outputs, etc. * * The `TopologyContext` is also used to declare `ISubscribedState` objects to * synchronize state with StateSpouts this object is subscribed to. */ public class TopologyContext extends WorkerTopologyContext implements IMetricsContext { private Integer _taskId; private Map<String, Object> _taskData = new HashMap<>(); private List<ITaskHook> _hooks = new ArrayList<>(); private Map<String, Object> _executorData; private Map<Integer, Map<Integer, Map<String, IMetric>>> _registeredMetrics; private AtomicBoolean _openOrPrepareWasCalled; // This is updated by the Worker and the topology has shared access to it private Map<String, Long> blobToLastKnownVersion; public TopologyContext(StormTopology topology, Map<String, Object> topoConf, Map<Integer, String> taskToComponent, Map<String, List<Integer>> componentToSortedTasks, Map<String, Map<String, Fields>> componentToStreamToFields, Map<String, Long> blobToLastKnownVersionShared, String stormId, String codeDir, String pidDir, Integer taskId, Integer workerPort, List<Integer> workerTasks, Map<String, Object> defaultResources, Map<String, Object> userResources, Map<String, Object> executorData, Map<Integer, Map<Integer, Map<String, IMetric>>> registeredMetrics, AtomicBoolean openOrPrepareWasCalled) { super(topology, topoConf, taskToComponent, componentToSortedTasks, componentToStreamToFields, stormId, codeDir, pidDir, workerPort, workerTasks, defaultResources, userResources); _taskId = taskId; _executorData = executorData; _registeredMetrics = registeredMetrics; _openOrPrepareWasCalled = openOrPrepareWasCalled; blobToLastKnownVersion = blobToLastKnownVersionShared; } /** * All state from all subscribed state spouts streams will be synced with * the provided object. * * It is recommended that your ISubscribedState object is kept as an instance * variable of this object. The recommended usage of this method is as follows: * * ```java * _myState = context.setAllSubscribedState(new MyState()); * ``` * * @param obj Provided ISubscribedState implementation * @return Returns the ISubscribedState object provided */ public <T extends ISubscribedState> T setAllSubscribedState(T obj) { //check that only subscribed to one component/stream for statespout //setsubscribedstate appropriately throw new NotImplementedException(); } /** * Synchronizes the default stream from the specified state spout component * id with the provided ISubscribedState object. * * The recommended usage of this method is as follows: * * ```java * _myState = context.setSubscribedState(componentId, new MyState()); * ``` * * @param componentId the id of the StateSpout component to subscribe to * @param obj Provided ISubscribedState implementation * @return Returns the ISubscribedState object provided */ public <T extends ISubscribedState> T setSubscribedState(String componentId, T obj) { return setSubscribedState(componentId, Utils.DEFAULT_STREAM_ID, obj); } /** * Synchronizes the specified stream from the specified state spout component * id with the provided ISubscribedState object. * * The recommended usage of this method is as follows: * * ```java * _myState = context.setSubscribedState(componentId, streamId, new MyState()); * ``` * * @param componentId the id of the StateSpout component to subscribe to * @param streamId the stream to subscribe to * @param obj Provided ISubscribedState implementation * @return Returns the ISubscribedState object provided */ public <T extends ISubscribedState> T setSubscribedState(String componentId, String streamId, T obj) { throw new NotImplementedException(); } public Map<String, Long> getBlobToLastKnownVersion() { return blobToLastKnownVersion; } /** * Gets the task id of this task. * * @return the task id */ public int getThisTaskId() { return _taskId; } /** * @return the component id for this task. The component id maps * to a component id specified for a Spout or Bolt in the topology definition. */ public String getThisComponentId() { return getComponentId(_taskId); } /** * Gets the declared output fields for the specified stream id for the * component this task is a part of. */ public Fields getThisOutputFields(String streamId) { return getComponentOutputFields(getThisComponentId(), streamId); } /** * Gets the declared output fields for all streams for the * component this task is a part of. */ public Map<String, List<String>> getThisOutputFieldsForStreams() { Map<String, List<String>> streamToFields = new HashMap<>(); for (String stream : this.getThisStreams()) { streamToFields.put(stream, this.getThisOutputFields(stream).toList()); } return streamToFields; } /** * Gets the set of streams declared for the component of this task. */ public Set<String> getThisStreams() { return getComponentStreams(getThisComponentId()); } /** * Gets the index of this task id in getComponentTasks(getThisComponentId()). * An example use case for this method is determining which task * accesses which resource in a distributed resource to ensure an even distribution. */ public int getThisTaskIndex() { List<Integer> tasks = new ArrayList<>(getComponentTasks(getThisComponentId())); Collections.sort(tasks); for (int i = 0; i < tasks.size(); i++) { if (tasks.get(i) == getThisTaskId()) { return i; } } throw new RuntimeException("Fatal: could not find this task id in this component"); } /** * Gets the declared input fields for this component. * * @return A map from sources to streams to fields. */ public Map<String, Map<String, List<String>>> getThisInputFields() { Map<String, Map<String, List<String>>> outputMap = new HashMap<>(); for (Map.Entry<GlobalStreamId, Grouping> entry : this.getThisSources().entrySet()) { String componentId = entry.getKey().get_componentId(); Set<String> streams = getComponentStreams(componentId); for (String stream : streams) { Map<String, List<String>> streamFieldMap = outputMap.get(componentId); if (streamFieldMap == null) { streamFieldMap = new HashMap<>(); outputMap.put(componentId, streamFieldMap); } streamFieldMap.put(stream, getComponentOutputFields(componentId, stream).toList()); } } return outputMap; } /** * Gets the declared inputs to this component. * * @return A map from subscribed component/stream to the grouping subscribed with. */ public Map<GlobalStreamId, Grouping> getThisSources() { return getSources(getThisComponentId()); } /** * Gets information about who is consuming the outputs of this component, and how. * * @return Map from stream id to component id to the Grouping used. */ public Map<String, Map<String, Grouping>> getThisTargets() { return getTargets(getThisComponentId()); } public void setTaskData(String name, Object data) { _taskData.put(name, data); } public Object getTaskData(String name) { return _taskData.get(name); } public void setExecutorData(String name, Object data) { _executorData.put(name, data); } public Object getExecutorData(String name) { return _executorData.get(name); } public void addTaskHook(ITaskHook hook) { hook.prepare(_topoConf, this); _hooks.add(hook); } public Collection<ITaskHook> getHooks() { return _hooks; } private static Map<String, Object> groupingToJSONableMap(Grouping grouping) { Map<String, Object> groupingMap = new HashMap<>(); groupingMap.put("type", grouping.getSetField().toString()); if (grouping.is_set_fields()) { groupingMap.put("fields", grouping.get_fields()); } return groupingMap; } @Override public String toJSONString() { Map<String, Object> obj = new HashMap<>(); obj.put("task->component", this.getTaskToComponent()); obj.put("taskid", this.getThisTaskId()); obj.put("componentid", this.getThisComponentId()); List<String> streamList = new ArrayList<>(); streamList.addAll(this.getThisStreams()); obj.put("streams", streamList); obj.put("stream->outputfields", this.getThisOutputFieldsForStreams()); // Convert targets to a JSON serializable format Map<String, Map<String, Object>> stringTargets = new HashMap<>(); for (Map.Entry<String, Map<String, Grouping>> entry : this.getThisTargets().entrySet()) { Map<String, Object> stringTargetMap = new HashMap<>(); for (Map.Entry<String, Grouping> innerEntry : entry.getValue().entrySet()) { stringTargetMap.put(innerEntry.getKey(), groupingToJSONableMap(innerEntry.getValue())); } stringTargets.put(entry.getKey(), stringTargetMap); } obj.put("stream->target->grouping", stringTargets); // Convert sources to a JSON serializable format Map<String, Map<String, Object>> stringSources = new HashMap<>(); for (Map.Entry<GlobalStreamId, Grouping> entry : this.getThisSources().entrySet()) { GlobalStreamId gid = entry.getKey(); Map<String, Object> stringSourceMap = stringSources.get(gid.get_componentId()); if (stringSourceMap == null) { stringSourceMap = new HashMap<>(); stringSources.put(gid.get_componentId(), stringSourceMap); } stringSourceMap.put(gid.get_streamId(), groupingToJSONableMap(entry.getValue())); } obj.put("source->stream->grouping", stringSources); obj.put("source->stream->fields", this.getThisInputFields()); return JSONValue.toJSONString(obj); } /* * Register a IMetric instance. * * Storm will then call `getValueAndReset()` on the metric every `timeBucketSizeInSecs` * and the returned value is sent to all metrics consumers. * * You must call this during `IBolt.prepare()` or `ISpout.open()`. * @return The IMetric argument unchanged. */ public <T extends IMetric> T registerMetric(String name, T metric, int timeBucketSizeInSecs) { if (_openOrPrepareWasCalled.get()) { throw new RuntimeException("TopologyContext.registerMetric can only be called from within overridden " + "IBolt::prepare() or ISpout::open() method."); } if (metric == null) { throw new IllegalArgumentException("Cannot register a null metric"); } if (timeBucketSizeInSecs <= 0) { throw new IllegalArgumentException( "TopologyContext.registerMetric can only be called with timeBucketSizeInSecs " + "greater than or equal to 1 second."); } if (getRegisteredMetricByName(name) != null) { throw new RuntimeException("The same metric name `" + name + "` was registered twice."); } Map<Integer, Map<Integer, Map<String, IMetric>>> m1 = _registeredMetrics; if (!m1.containsKey(timeBucketSizeInSecs)) { m1.put(timeBucketSizeInSecs, new HashMap<Integer, Map<String, IMetric>>()); } Map<Integer, Map<String, IMetric>> m2 = m1.get(timeBucketSizeInSecs); if (!m2.containsKey(_taskId)) { m2.put(_taskId, new HashMap<String, IMetric>()); } Map<String, IMetric> m3 = m2.get(_taskId); if (m3.containsKey(name)) { throw new RuntimeException("The same metric name `" + name + "` was registered twice."); } else { m3.put(name, metric); } return metric; } /** * Get component's metric from registered metrics by name. * Notice: Normally, one component can only register one metric name once. * But now registerMetric has a bug(https://issues.apache.org/jira/browse/STORM-254) * cause the same metric name can register twice. * So we just return the first metric we meet. */ public IMetric getRegisteredMetricByName(String name) { IMetric metric = null; for (Map<Integer, Map<String, IMetric>> taskIdToNameToMetric : _registeredMetrics.values()) { Map<String, IMetric> nameToMetric = taskIdToNameToMetric.get(_taskId); if (nameToMetric != null) { metric = nameToMetric.get(name); if (metric != null) { //we just return the first metric we meet break; } } } return metric; } /* * Convenience method for registering ReducedMetric. */ public ReducedMetric registerMetric(String name, IReducer reducer, int timeBucketSizeInSecs) { return registerMetric(name, new ReducedMetric(reducer), timeBucketSizeInSecs); } /* * Convenience method for registering CombinedMetric. */ public CombinedMetric registerMetric(String name, ICombiner combiner, int timeBucketSizeInSecs) { return registerMetric(name, new CombinedMetric(combiner), timeBucketSizeInSecs); } }