com.datatorrent.stram.plan.physical.StreamMapping.java Source code

Java tutorial

Introduction

Here is the source code for com.datatorrent.stram.plan.physical.StreamMapping.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.stram.plan.physical;

import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.math.IntMath;

import com.datatorrent.api.Context;
import com.datatorrent.api.Context.PortContext;
import com.datatorrent.api.Operator;
import com.datatorrent.api.Partitioner.PartitionKeys;
import com.datatorrent.api.StreamCodec;

import com.datatorrent.common.util.Pair;
import com.datatorrent.stram.StreamingContainerAgent;
import com.datatorrent.stram.plan.logical.LogicalPlan;
import com.datatorrent.stram.plan.logical.LogicalPlan.InputPortMeta;
import com.datatorrent.stram.plan.logical.LogicalPlan.OperatorMeta;
import com.datatorrent.stram.plan.logical.LogicalPlan.StreamMeta;
import com.datatorrent.stram.plan.logical.Operators;
import com.datatorrent.stram.plan.logical.Operators.PortMappingDescriptor;
import com.datatorrent.stram.plan.physical.PTOperator.PTInput;
import com.datatorrent.stram.plan.physical.PTOperator.PTOutput;

/**
 * Encapsulates the mapping of input to output operators, including unifiers. Depending on logical plan setting and
 * number of partitions, unifiers are created as needed and potentially cascaded.
 *
 * @since 0.9.0
 */
public class StreamMapping implements java.io.Serializable {
    private static final long serialVersionUID = 8572852828117485193L;

    private final static Logger LOG = LoggerFactory.getLogger(StreamMapping.class);

    private final StreamMeta streamMeta;
    private final PhysicalPlan plan;
    PTOperator finalUnifier;
    final Set<PTOperator> cascadingUnifiers = Sets.newHashSet();
    final Set<PTOperator> slidingUnifiers = Sets.newHashSet();
    private final List<PTOutput> upstream = Lists.newArrayList();

    public StreamMapping(StreamMeta streamMeta, PhysicalPlan plan) {
        this.streamMeta = streamMeta;
        this.plan = plan;
    }

    void addTo(Collection<PTOperator> opers) {
        if (finalUnifier != null) {
            opers.add(finalUnifier);
        }
        opers.addAll(cascadingUnifiers);
        opers.addAll(slidingUnifiers);
    }

    public void setSources(Collection<PTOperator> partitions) {
        upstream.clear();
        // add existing inputs
        for (PTOperator uoper : partitions) {
            for (PTOutput source : uoper.outputs) {
                if (source.logicalStream == streamMeta) {
                    upstream.add(source);
                }
            }
        }
        redoMapping();
    }

    public static PTOperator createSlidingUnifier(StreamMeta streamMeta, PhysicalPlan plan,
            int operatorApplicationWindowCount, int slidingWindowCount) {
        int gcd = IntMath.gcd(operatorApplicationWindowCount, slidingWindowCount);
        OperatorMeta um = streamMeta.getSource().getSlidingUnifier(operatorApplicationWindowCount / gcd, gcd,
                slidingWindowCount / gcd);
        PTOperator pu = plan.newOperator(um, um.getName());

        Operator unifier = um.getOperator();
        PortMappingDescriptor mergeDesc = new PortMappingDescriptor();
        Operators.describe(unifier, mergeDesc);
        if (mergeDesc.outputPorts.size() != 1) {
            throw new AssertionError(
                    "Unifier must have a single output port, instead found : " + mergeDesc.outputPorts);
        }
        pu.unifiedOperatorMeta = streamMeta.getSource().getOperatorMeta();
        pu.outputs.add(new PTOutput(mergeDesc.outputPorts.keySet().iterator().next(), streamMeta, pu));
        plan.newOpers.put(pu, unifier);
        return pu;
    }

    public static PTOperator createUnifier(StreamMeta streamMeta, PhysicalPlan plan) {
        OperatorMeta um = streamMeta.getSource().getUnifierMeta();
        PTOperator pu = plan.newOperator(um, um.getName());

        Operator unifier = um.getOperator();
        PortMappingDescriptor mergeDesc = new PortMappingDescriptor();
        Operators.describe(unifier, mergeDesc);
        if (mergeDesc.outputPorts.size() != 1) {
            throw new AssertionError(
                    "Unifier must have a single output port, instead found : " + mergeDesc.outputPorts);
        }

        pu.unifiedOperatorMeta = streamMeta.getSource().getOperatorMeta();
        pu.outputs.add(new PTOutput(mergeDesc.outputPorts.keySet().iterator().next(), streamMeta, pu));
        plan.newOpers.put(pu, unifier);
        return pu;
    }

    private void addSlidingUnifiers() {
        OperatorMeta sourceOM = streamMeta.getSource().getOperatorMeta();
        if (sourceOM.getAttributes().contains(Context.OperatorContext.SLIDE_BY_WINDOW_COUNT)) {
            if (sourceOM.getValue(Context.OperatorContext.SLIDE_BY_WINDOW_COUNT) < sourceOM
                    .getValue(Context.OperatorContext.APPLICATION_WINDOW_COUNT)) {
                plan.undeployOpers.addAll(slidingUnifiers);
                slidingUnifiers.clear();
                List<PTOutput> newUpstream = Lists.newArrayList();
                PTOperator slidingUnifier;
                for (PTOutput source : upstream) {
                    slidingUnifier = StreamMapping.createSlidingUnifier(streamMeta, plan,
                            sourceOM.getValue(Context.OperatorContext.APPLICATION_WINDOW_COUNT),
                            sourceOM.getValue(Context.OperatorContext.SLIDE_BY_WINDOW_COUNT));
                    addInput(slidingUnifier, source, null);
                    this.slidingUnifiers.add(slidingUnifier);
                    newUpstream.add(slidingUnifier.outputs.get(0));
                }
                upstream.clear();
                upstream.addAll(newUpstream);
            } else {
                LOG.warn("Sliding Window Count {} should be less than APPLICATION WINDOW COUNT {}",
                        sourceOM.getValue(Context.OperatorContext.SLIDE_BY_WINDOW_COUNT),
                        sourceOM.getValue(Context.OperatorContext.APPLICATION_WINDOW_COUNT));
            }
        }
    }

    @SuppressWarnings("AssignmentToForLoopParameter")
    private List<PTOutput> setupCascadingUnifiers(List<PTOutput> upstream, List<PTOperator> pooledUnifiers,
            int limit, int level) {
        List<PTOutput> nextLevel = Lists.newArrayList();
        PTOperator pu = null;
        for (int i = 0; i < upstream.size(); i++) {
            if (i % limit == 0) {
                if (upstream.size() - i < limit) {
                    while (i < upstream.size()) {
                        nextLevel.add(upstream.get(i));
                        i++;
                    }
                    continue;
                }
                if (!pooledUnifiers.isEmpty()) {
                    pu = pooledUnifiers.remove(0);
                } else {
                    pu = createUnifier(streamMeta, plan);
                }
                assert (pu.outputs.size() == 1) : "unifier has single output";
                nextLevel.addAll(pu.outputs);
                this.cascadingUnifiers.add(pu);
            }

            PTOutput source = upstream.get(i);
            addInput(pu, source, null);
        }

        if (nextLevel.size() > limit) {
            return setupCascadingUnifiers(nextLevel, pooledUnifiers, limit, level);
        } else {
            return nextLevel;
        }
    }

    /**
     * rebuild the tree, which may cause more changes to execution layer than need be
     * TODO: investigate incremental logic
     */
    private void redoMapping() {

        Set<Pair<PTOperator, InputPortMeta>> downstreamOpers = Sets.newHashSet();

        // figure out the downstream consumers
        for (InputPortMeta ipm : streamMeta.getSinks()) {
            // gets called prior to all logical operators mapped
            // skipped for parallel partitions - those are handled elsewhere
            if (!ipm.getValue(PortContext.PARTITION_PARALLEL) && plan.hasMapping(ipm.getOperatorWrapper())) {
                List<PTOperator> partitions = plan.getOperators(ipm.getOperatorWrapper());
                for (PTOperator doper : partitions) {
                    downstreamOpers.add(new Pair<PTOperator, InputPortMeta>(doper, ipm));
                }
            }
        }

        if (!downstreamOpers.isEmpty()) {
            // unifiers are required
            for (PTOperator unifier : this.cascadingUnifiers) {
                detachUnifier(unifier);
            }
            if (this.finalUnifier != null) {
                detachUnifier(finalUnifier);
            }

            List<PTOperator> currentUnifiers = Lists.newArrayList(this.cascadingUnifiers);
            this.cascadingUnifiers.clear();
            plan.undeployOpers.addAll(currentUnifiers);
            addSlidingUnifiers();

            int limit = streamMeta.getSource().getValue(PortContext.UNIFIER_LIMIT);

            boolean separateUnifiers = false;
            Integer lastId = null;
            for (InputPortMeta ipm : streamMeta.getSinks()) {
                StreamCodec<?> streamCodecInfo = StreamingContainerAgent.getStreamCodec(ipm);
                Integer id = plan.getStreamCodecIdentifier(streamCodecInfo);
                if (lastId == null) {
                    lastId = id;
                } else if (!id.equals(lastId)) {
                    separateUnifiers = true;
                    break;
                }
            }

            List<PTOutput> unifierSources = this.upstream;
            Map<StreamCodec<?>, List<PTOutput>> cascadeUnifierSourcesMap = Maps.newHashMap();

            if (limit > 1 && this.upstream.size() > limit) {
                // cascading unifier
                if (!separateUnifiers) {
                    unifierSources = setupCascadingUnifiers(this.upstream, currentUnifiers, limit, 0);
                } else {
                    for (InputPortMeta ipm : streamMeta.getSinks()) {
                        StreamCodec<?> streamCodecInfo = StreamingContainerAgent.getStreamCodec(ipm);
                        if (!cascadeUnifierSourcesMap.containsKey(streamCodecInfo)) {
                            unifierSources = setupCascadingUnifiers(this.upstream, currentUnifiers, limit, 0);
                            cascadeUnifierSourcesMap.put(streamCodecInfo, unifierSources);
                        }
                    }
                }
            }

            // remove remaining unifiers
            for (PTOperator oper : currentUnifiers) {
                plan.removePTOperator(oper);
            }

            // Directly getting attribute from map to know if it is set or not as it can be overriden by the input
            Boolean sourceSingleFinal = streamMeta.getSource().getAttributes()
                    .get(PortContext.UNIFIER_SINGLE_FINAL);

            // link the downstream operators with the unifiers
            for (Pair<PTOperator, InputPortMeta> doperEntry : downstreamOpers) {

                Map<LogicalPlan.InputPortMeta, PartitionKeys> partKeys = doperEntry.first.partitionKeys;
                PartitionKeys pks = partKeys != null ? partKeys.get(doperEntry.second) : null;
                Boolean sinkSingleFinal = doperEntry.second.getAttributes().get(PortContext.UNIFIER_SINGLE_FINAL);
                boolean lastSingle = (sinkSingleFinal != null) ? sinkSingleFinal
                        : (sourceSingleFinal != null ? sourceSingleFinal.booleanValue()
                                : PortContext.UNIFIER_SINGLE_FINAL.defaultValue);

                if (upstream.size() > 1) {
                    if (!separateUnifiers && ((pks == null || pks.mask == 0) || lastSingle)) {
                        if (finalUnifier == null) {
                            finalUnifier = createUnifier(streamMeta, plan);
                        }
                        setInput(doperEntry.first, doperEntry.second, finalUnifier,
                                (pks == null) || (pks.mask == 0) ? null : pks);
                        if (finalUnifier.inputs.isEmpty()) {
                            // set unifier inputs once, regardless how many downstream operators there are
                            for (PTOutput out : unifierSources) {
                                addInput(this.finalUnifier, out, null);
                            }
                        }
                    } else {
                        // MxN partitioning: unifier per downstream partition
                        LOG.debug("MxN unifier for {} {} {}",
                                new Object[] { doperEntry.first, doperEntry.second.getPortName(), pks });
                        PTOperator unifier = doperEntry.first.upstreamMerge.get(doperEntry.second);
                        if (unifier == null) {
                            unifier = createUnifier(streamMeta, plan);
                            doperEntry.first.upstreamMerge.put(doperEntry.second, unifier);
                            setInput(doperEntry.first, doperEntry.second, unifier, null);
                        }
                        // sources may change dynamically, rebuild inputs (as for cascading unifiers)
                        for (PTInput in : unifier.inputs) {
                            in.source.sinks.remove(in);
                        }
                        unifier.inputs.clear();
                        List<PTOutput> doperUnifierSources = unifierSources;
                        if (separateUnifiers) {
                            StreamCodec<?> streamCodecInfo = StreamingContainerAgent
                                    .getStreamCodec(doperEntry.second);
                            List<PTOutput> cascadeSources = cascadeUnifierSourcesMap.get(streamCodecInfo);
                            if (cascadeSources != null) {
                                doperUnifierSources = cascadeSources;
                            }
                        }
                        // add new inputs
                        for (PTOutput out : doperUnifierSources) {
                            addInput(unifier, out, pks);
                        }
                    }
                } else {
                    // no partitioning
                    PTOperator unifier = doperEntry.first.upstreamMerge.remove(doperEntry.second);
                    if (unifier != null) {
                        plan.removePTOperator(unifier);
                    }
                    setInput(doperEntry.first, doperEntry.second, upstream.get(0).source, pks);
                }
            }

            // Remove the unattached final unifier
            // Unattached final unifier is from
            // 1) Upstream operator partitions are scaled down to one. (no unifier needed)
            // 2) Downstream operators partitions are scaled up from one to multiple. (replaced by merged unifier)
            if (finalUnifier != null && finalUnifier.inputs.isEmpty()) {
                plan.removePTOperator(finalUnifier);
                finalUnifier = null;
            }

        }

    }

    private void setInput(PTOperator oper, InputPortMeta ipm, PTOperator sourceOper, PartitionKeys pks) {
        // TODO: see if this can be handled more efficiently
        for (PTInput in : oper.inputs) {
            if (in.source.source == sourceOper && in.logicalStream == streamMeta
                    && ipm.getPortName().equals(in.portName)) {
                return;
            }
        }
        // link to upstream output(s) for this stream
        for (PTOutput upstreamOut : sourceOper.outputs) {
            if (upstreamOut.logicalStream == streamMeta) {
                PTInput input = new PTInput(ipm.getPortName(), streamMeta, oper, pks, upstreamOut,
                        ipm.getValue(LogicalPlan.IS_CONNECTED_TO_DELAY_OPERATOR));
                oper.inputs.add(input);
            }
        }
    }

    public static void addInput(PTOperator target, PTOutput upstreamOut, PartitionKeys pks) {
        StreamMeta lStreamMeta = upstreamOut.logicalStream;
        PTInput input = new PTInput("<merge#" + lStreamMeta.getSource().getPortName() + ">", lStreamMeta, target,
                pks, upstreamOut, false);
        target.inputs.add(input);
    }

    private void detachUnifier(PTOperator unifier) {
        // remove existing unifiers from downstream inputs
        for (PTOutput out : unifier.outputs) {
            for (PTInput input : out.sinks) {
                input.target.inputs.remove(input);
            }
            out.sinks.clear();
        }
        // remove from upstream outputs
        for (PTInput in : unifier.inputs) {
            in.source.sinks.remove(in);
        }
        unifier.inputs.clear();
    }

}