ch.epfl.data.squall.components.theta.AdaptiveThetaJoinComponent.java Source code

Java tutorial

Introduction

Here is the source code for ch.epfl.data.squall.components.theta.AdaptiveThetaJoinComponent.java

Source

/*
 * Copyright (c) 2011-2015 EPFL DATA Laboratory
 * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE)
 *
 * All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 *
 * @author El Seidy
 * This Class is the Theta-join-Dynamic Wrapper which includes all the Theta components.
 * 1- ThetaReshuffler Bolt.
 * 2- ThetaJoiner Bolt.
 * 3- One instance of ThetaClock Spout.
 * 4- One instance of ThetaMappingAssignerSynchronizer Bolt.
 */
package ch.epfl.data.squall.components.theta;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;

import backtype.storm.Config;
import backtype.storm.topology.TopologyBuilder;
import ch.epfl.data.squall.components.Component;
import ch.epfl.data.squall.components.DataSourceComponent;
import ch.epfl.data.squall.components.JoinerComponent;
import ch.epfl.data.squall.expressions.ValueExpression;
import ch.epfl.data.squall.operators.ChainOperator;
import ch.epfl.data.squall.operators.Operator;
import ch.epfl.data.squall.predicates.Predicate;
import ch.epfl.data.squall.storm_components.InterchangingComponent;
import ch.epfl.data.squall.storm_components.StormComponent;
import ch.epfl.data.squall.storm_components.StormEmitter;
import ch.epfl.data.squall.storm_components.synchronization.TopologyKiller;
import ch.epfl.data.squall.thetajoin.adaptive.storm_component.ThetaJoinerAdaptiveAdvisedEpochs;
import ch.epfl.data.squall.thetajoin.adaptive.storm_component.ThetaReshufflerAdvisedEpochs;
import ch.epfl.data.squall.thetajoin.adaptive.storm_matrix_mapping.ThetaDataMigrationJoinerToReshufflerMapping;
import ch.epfl.data.squall.thetajoin.adaptive.storm_matrix_mapping.ThetaJoinAdaptiveMapping;
import ch.epfl.data.squall.thetajoin.matrix_assignment.ContentInsensitiveMatrixAssignment;
import ch.epfl.data.squall.types.Type;
import ch.epfl.data.squall.utilities.MyUtilities;
import ch.epfl.data.squall.utilities.SystemParameters;
import ch.epfl.data.squall.window_semantics.WindowSemanticsManager;

public class AdaptiveThetaJoinComponent extends JoinerComponent implements Component {
    private static final long serialVersionUID = 1L;
    private static Logger LOG = Logger.getLogger(AdaptiveThetaJoinComponent.class);
    private final Component _firstParent;
    private final Component _secondParent;
    private Component _child;
    private String _componentName;
    private long _batchOutputMillis;
    private List<Integer> _hashIndexes;
    private List<ValueExpression> _hashExpressions;
    private ThetaJoinerAdaptiveAdvisedEpochs _joiner;
    private ThetaReshufflerAdvisedEpochs _reshuffler;
    private final ChainOperator _chain = new ChainOperator();
    private boolean _printOut;
    private boolean _printOutSet; // whether printOut was already set
    private Predicate _joinPredicate;
    private int _joinerParallelism;
    private InterchangingComponent _interComp = null;

    public AdaptiveThetaJoinComponent(Component firstParent, Component secondParent) {
        _firstParent = firstParent;
        _firstParent.setChild(this);
        _secondParent = secondParent;
        if (_secondParent != null) {
            _secondParent.setChild(this);
            _componentName = firstParent.getName() + "_" + secondParent.getName();
        } else
            _componentName = new String(firstParent.getName().split("-")[0]) + "_"
                    + new String(firstParent.getName().split("-")[1]);
    }

    @Override
    public AdaptiveThetaJoinComponent add(Operator operator) {
        _chain.addOperator(operator);
        return this;
    }

    @Override
    public boolean equals(Object obj) {
        if (obj instanceof Component)
            return _componentName.equals(((Component) obj).getName());
        else
            return false;
    }

    @Override
    public List<DataSourceComponent> getAncestorDataSources() {
        final List<DataSourceComponent> list = new ArrayList<DataSourceComponent>();
        for (final Component parent : getParents())
            list.addAll(parent.getAncestorDataSources());
        return list;
    }

    @Override
    public long getBatchOutputMillis() {
        return _batchOutputMillis;
    }

    @Override
    public ChainOperator getChainOperator() {
        return _chain;
    }

    @Override
    public Component getChild() {
        return _child;
    }

    // from StormEmitter interface
    @Override
    public String[] getEmitterIDs() {
        return _joiner.getEmitterIDs();
    }

    @Override
    public List<String> getFullHashList() {
        throw new RuntimeException("Load balancing for Dynamic Theta join is done inherently!");
    }

    @Override
    public List<ValueExpression> getHashExpressions() {
        return _hashExpressions;
    }

    @Override
    public List<Integer> getHashIndexes() {
        return _hashIndexes;
    }

    @Override
    public String getInfoID() {
        return _joiner.getInfoID();
    }

    public InterchangingComponent getInterComp() {
        return _interComp;
    }

    public Predicate getJoinPredicate() {
        return _joinPredicate;
    }

    @Override
    public String getName() {
        return _componentName;
    }

    @Override
    public Component[] getParents() {
        return new Component[] { _firstParent, _secondParent };
    }

    @Override
    public boolean getPrintOut() {
        return _printOut;
    }

    @Override
    public int hashCode() {
        int hash = 7;
        hash = 37 * hash + (_componentName != null ? _componentName.hashCode() : 0);
        return hash;
    }

    @Override
    public void makeBolts(TopologyBuilder builder, TopologyKiller killer, List<String> allCompNames, Config conf,
            int hierarchyPosition) {

        // by default print out for the last component
        // for other conditions, can be set via setPrintOut
        if (hierarchyPosition == StormComponent.FINAL_COMPONENT && !_printOutSet)
            setPrintOut(true);
        _joinerParallelism = SystemParameters.getInt(conf, _componentName + "_PAR");
        MyUtilities.checkBatchOutput(_batchOutputMillis, _chain.getAggregation(), conf);

        int firstCardinality, secondCardinality;
        if (_secondParent == null) { // then first has to be of type
            // Interchanging Emitter
            firstCardinality = SystemParameters.getInt(conf,
                    new String(_firstParent.getName().split("-")[0] + "_CARD"));
            secondCardinality = SystemParameters.getInt(conf,
                    new String(_firstParent.getName().split("-")[1] + "_CARD"));
        } else {
            firstCardinality = SystemParameters.getInt(conf, _firstParent.getName() + "_CARD");
            secondCardinality = SystemParameters.getInt(conf, _secondParent.getName() + "_CARD");
        }

        final ContentInsensitiveMatrixAssignment _currentMappingAssignment = new ContentInsensitiveMatrixAssignment(
                firstCardinality, secondCardinality, _joinerParallelism, -1);

        final String dim = _currentMappingAssignment.getMappingDimensions();
        // dim ="1-1"; //initiate Splitting
        LOG.info(_componentName + "Initial Dimensions is: " + dim);

        // create the bolts ..

        // Create the reshuffler.
        _reshuffler = new ThetaReshufflerAdvisedEpochs(_firstParent, _secondParent, allCompNames,
                _joinerParallelism, hierarchyPosition, conf, builder, dim);

        if (_interComp != null)
            _reshuffler.set_interComp(_interComp);

        // Create the Join Bolt.
        _joiner = new ThetaJoinerAdaptiveAdvisedEpochs(_firstParent, _secondParent, this, allCompNames,
                _joinPredicate, hierarchyPosition, builder, killer, conf, _reshuffler, dim);
        _reshuffler.setJoinerID(_joiner.getID());

        /*
         * setup communication between the components.
         */
        // 1) Hook up the mapper&Synchronizer(reshuffler) to the reshuffler
        _reshuffler.getCurrentBolt().directGrouping(_reshuffler.getID(), SystemParameters.ThetaSynchronizerSignal);

        // 2) Hook up the previous emitters to the reshuffler

        final ThetaJoinAdaptiveMapping dMap = new ThetaJoinAdaptiveMapping(conf, -1);
        final ArrayList<StormEmitter> emittersList = new ArrayList<StormEmitter>();
        if (_interComp == null) {
            emittersList.add(_firstParent);
            if (_secondParent != null)
                emittersList.add(_secondParent);
        } else
            emittersList.add(_interComp);
        for (final StormEmitter emitter : emittersList) {
            final String[] emitterIDs = emitter.getEmitterIDs();
            for (final String emitterID : emitterIDs)
                _reshuffler.getCurrentBolt().customGrouping(emitterID, dMap); // default
            // message
            // stream
        }
        // 3) Hook up the DataMigration from the joiners to the reshuffler
        _reshuffler.getCurrentBolt().customGrouping(_joiner.getID(),
                SystemParameters.ThetaDataMigrationJoinerToReshuffler,
                new ThetaDataMigrationJoinerToReshufflerMapping(conf, -1));
        // --for the LAST_ACK !!
        _joiner.getCurrentBolt().allGrouping(_reshuffler.getID());
        // 4) Hook up the signals from the reshuffler to the joiners
        _joiner.getCurrentBolt().allGrouping(_reshuffler.getID(), SystemParameters.ThetaReshufflerSignal);
        // 5) Hook up the DataMigration from the reshuffler to the joiners
        _joiner.getCurrentBolt().directGrouping(_reshuffler.getID(),
                SystemParameters.ThetaDataMigrationReshufflerToJoiner);
        // 6) Hook up the Data_Stream data (normal tuples) from the reshuffler
        // to the joiners
        _joiner.getCurrentBolt().directGrouping(_reshuffler.getID(), SystemParameters.ThetaDataReshufflerToJoiner);
        // 7) Hook up the Acks from the Joiner to the Mapper&Synchronizer
        _reshuffler.getCurrentBolt().directGrouping(_joiner.getID(), SystemParameters.ThetaJoinerAcks);// synchronizer is already one
        // anyway.
    }

    @Override
    public AdaptiveThetaJoinComponent setBatchOutputMillis(long millis) {
        _batchOutputMillis = millis;
        return this;
    }

    @Override
    public void setChild(Component child) {
        _child = child;
    }

    // TODO IMPLEMENT ME
    @Override
    public Component setContentSensitiveThetaJoinWrapper(Type wrapper) {
        return this;
    }

    // list of distinct keys, used for direct stream grouping and load-balancing
    // ()
    @Override
    public ThetaJoinComponent setFullHashList(List<String> fullHashList) {
        throw new RuntimeException("Load balancing for Dynamic Theta join is done inherently!");
    }

    @Override
    public AdaptiveThetaJoinComponent setHashExpressions(List<ValueExpression> hashExpressions) {
        _hashExpressions = hashExpressions;
        return this;
    }

    @Override
    public AdaptiveThetaJoinComponent setInterComp(InterchangingComponent _interComp) {
        this._interComp = _interComp;
        return this;
    }

    @Override
    public Component setJoinPredicate(Predicate joinPredicate) {
        _joinPredicate = joinPredicate;
        return this;
    }

    @Override
    public AdaptiveThetaJoinComponent setOutputPartKey(int... hashIndexes) {
        return setOutputPartKey(Arrays.asList(ArrayUtils.toObject(hashIndexes)));
    }

    @Override
    public AdaptiveThetaJoinComponent setOutputPartKey(List<Integer> hashIndexes) {
        _hashIndexes = hashIndexes;
        return this;
    }

    @Override
    public AdaptiveThetaJoinComponent setPrintOut(boolean printOut) {
        _printOutSet = true;
        _printOut = printOut;
        return this;
    }

    @Override
    public Component setSlidingWindow(int windowRange) {
        WindowSemanticsManager._IS_WINDOW_SEMANTICS = true;
        _windowSize = windowRange * 1000; // Width in terms of millis, Default
        // is -1 which is full history

        return this;
    }

    @Override
    public Component setTumblingWindow(int windowRange) {
        WindowSemanticsManager._IS_WINDOW_SEMANTICS = true;
        _tumblingWindowSize = windowRange * 1000;// For tumbling semantics
        return null;
    }

}