ch.epfl.data.squall.components.EquiJoinComponent.java Source code

Java tutorial

Introduction

Here is the source code for ch.epfl.data.squall.components.EquiJoinComponent.java

Source

/*
 * Copyright (c) 2011-2015 EPFL DATA Laboratory
 * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE)
 *
 * All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package ch.epfl.data.squall.components;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;

import backtype.storm.Config;
import backtype.storm.topology.TopologyBuilder;
import ch.epfl.data.squall.expressions.ValueExpression;
import ch.epfl.data.squall.operators.ChainOperator;
import ch.epfl.data.squall.operators.Operator;
import ch.epfl.data.squall.operators.ProjectOperator;
import ch.epfl.data.squall.predicates.Predicate;
import ch.epfl.data.squall.storage.AggregationStorage;
import ch.epfl.data.squall.storage.BasicStore;
import ch.epfl.data.squall.storage.KeyValueStore;
import ch.epfl.data.squall.storm_components.InterchangingComponent;
import ch.epfl.data.squall.storm_components.StormComponent;
import ch.epfl.data.squall.storm_components.StormDstJoin;
import ch.epfl.data.squall.storm_components.StormDstTupleStorageBDB;
import ch.epfl.data.squall.storm_components.StormDstTupleStorageJoin;
import ch.epfl.data.squall.storm_components.StormEmitter;
import ch.epfl.data.squall.storm_components.synchronization.TopologyKiller;
import ch.epfl.data.squall.types.Type;
import ch.epfl.data.squall.utilities.MyUtilities;
import ch.epfl.data.squall.window_semantics.WindowSemanticsManager;

public class EquiJoinComponent extends JoinerComponent implements Component {
    private static final long serialVersionUID = 1L;
    private static Logger LOG = Logger.getLogger(EquiJoinComponent.class);

    private final Component _firstParent;
    private final Component _secondParent;
    private Component _child;

    private final String _componentName;

    private long _batchOutputMillis;

    private List<Integer> _hashIndexes;
    private List<ValueExpression> _hashExpressions;

    private StormEmitter _joiner;

    private final ChainOperator _chain = new ChainOperator();

    // The storage is actually KeyValue<String, String>
    // or AggregationStorage<Numeric> for pre-aggregation
    // Access method returns a list of Strings (a list of Numerics for
    // pre-aggregation)
    private BasicStore<String> _firstStorage, _secondStorage;
    // preAggregation
    private ProjectOperator _firstPreAggProj, _secondPreAggProj;

    private boolean _printOut;
    private boolean _printOutSet; // whether printOut was already set

    private List<String> _fullHashList;
    private Predicate _joinPredicate;

    private boolean _isRemoveIndex = true;

    public EquiJoinComponent(Component firstParent, Component secondParent) {
        _firstParent = firstParent;
        _firstParent.setChild(this);
        _secondParent = secondParent;
        _secondParent.setChild(this);

        _componentName = firstParent.getName() + "_" + secondParent.getName();
    }

    public EquiJoinComponent(Component firstParent, Component secondParent, boolean isRemoveIndex) {
        _firstParent = firstParent;
        _firstParent.setChild(this);
        _secondParent = secondParent;
        _secondParent.setChild(this);
        _componentName = firstParent.getName() + "_" + secondParent.getName();
        _isRemoveIndex = isRemoveIndex;
    }

    public EquiJoinComponent(Component firstParent, int firstJoinIndex, Component secondParent,
            int secondJoinIndex) {
        this(firstParent, secondParent);
        firstParent.setOutputPartKey(firstJoinIndex);
        secondParent.setOutputPartKey(secondJoinIndex);
    }

    @Override
    public EquiJoinComponent add(Operator operator) {
        _chain.addOperator(operator);
        return this;
    }

    @Override
    public boolean equals(Object obj) {
        if (obj instanceof Component)
            return _componentName.equals(((Component) obj).getName());
        else
            return false;
    }

    @Override
    public List<DataSourceComponent> getAncestorDataSources() {
        final List<DataSourceComponent> list = new ArrayList<DataSourceComponent>();
        for (final Component parent : getParents())
            list.addAll(parent.getAncestorDataSources());
        return list;
    }

    @Override
    public long getBatchOutputMillis() {
        return _batchOutputMillis;
    }

    @Override
    public ChainOperator getChainOperator() {
        return _chain;
    }

    @Override
    public Component getChild() {
        return _child;
    }

    // from StormEmitter interface
    @Override
    public String[] getEmitterIDs() {
        return _joiner.getEmitterIDs();
    }

    @Override
    public List<String> getFullHashList() {
        return _fullHashList;
    }

    @Override
    public List<ValueExpression> getHashExpressions() {
        return _hashExpressions;
    }

    @Override
    public List<Integer> getHashIndexes() {
        return _hashIndexes;
    }

    @Override
    public String getInfoID() {
        return _joiner.getInfoID();
    }

    @Override
    public String getName() {
        return _componentName;
    }

    @Override
    public Component[] getParents() {
        return new Component[] { _firstParent, _secondParent };
    }

    @Override
    public boolean getPrintOut() {
        return _printOut;
    }

    @Override
    public int hashCode() {
        int hash = 7;
        hash = 37 * hash + (_componentName != null ? _componentName.hashCode() : 0);
        return hash;
    }

    @Override
    public void makeBolts(TopologyBuilder builder, TopologyKiller killer, List<String> allCompNames, Config conf,
            int hierarchyPosition) {

        // by default print out for the last component
        // for other conditions, can be set via setPrintOut
        if (hierarchyPosition == StormComponent.FINAL_COMPONENT && !_printOutSet)
            setPrintOut(true);

        MyUtilities.checkBatchOutput(_batchOutputMillis, _chain.getAggregation(), conf);

        // If not set in Preaggregation, we set normal storages
        if (_firstStorage == null)
            _firstStorage = new KeyValueStore<String, String>(conf);
        if (_secondStorage == null)
            _secondStorage = new KeyValueStore<String, String>(conf);

        boolean isBDB = MyUtilities.isBDB(conf);
        if (isBDB && _joinPredicate == null) {
            throw new RuntimeException("Please provide _joinPredicate if you want to run BDB!");
        }

        // TODO: what is with the if condition
        if (isBDB && (hierarchyPosition == StormComponent.FINAL_COMPONENT)) {
            _joiner = new StormDstTupleStorageBDB(_firstParent, _secondParent, this, allCompNames, _joinPredicate,
                    hierarchyPosition, builder, killer, conf);
        } else if (_joinPredicate != null) {
            _joiner = new StormDstTupleStorageJoin(_firstParent, _secondParent, this, allCompNames, _joinPredicate,
                    hierarchyPosition, builder, killer, conf);
        } else {
            // should issue a warning
            _joiner = new StormDstJoin(_firstParent, _secondParent, this, allCompNames, _firstStorage,
                    _secondStorage, _firstPreAggProj, _secondPreAggProj, hierarchyPosition, builder, killer, conf,
                    _isRemoveIndex);
        }
    }

    @Override
    public EquiJoinComponent setBatchOutputMillis(long millis) {
        _batchOutputMillis = millis;
        return this;
    }

    @Override
    public void setChild(Component child) {
        _child = child;
    }

    @Override
    public Component setContentSensitiveThetaJoinWrapper(Type wrapper) {
        return this;
    }

    // Out of the first storage (join of S tuple with R relation)
    public EquiJoinComponent setFirstPreAggProj(ProjectOperator firstPreAggProj) {
        _firstPreAggProj = firstPreAggProj;
        return this;
    }

    // next four methods are for Preaggregation
    public EquiJoinComponent setFirstPreAggStorage(AggregationStorage firstPreAggStorage) {
        _firstStorage = firstPreAggStorage;
        return this;
    }

    // list of distinct keys, used for direct stream grouping and load-balancing
    // ()
    @Override
    public EquiJoinComponent setFullHashList(List<String> fullHashList) {
        _fullHashList = fullHashList;
        return this;
    }

    @Override
    public EquiJoinComponent setHashExpressions(List<ValueExpression> hashExpressions) {
        _hashExpressions = hashExpressions;
        return this;
    }

    @Override
    public Component setInterComp(InterchangingComponent inter) {
        throw new RuntimeException("EquiJoin component does not support setInterComp");
    }

    @Override
    public EquiJoinComponent setJoinPredicate(Predicate predicate) {
        _joinPredicate = predicate;
        return this;
    }

    @Override
    public EquiJoinComponent setOutputPartKey(int... hashIndexes) {
        return setOutputPartKey(Arrays.asList(ArrayUtils.toObject(hashIndexes)));
    }

    @Override
    public EquiJoinComponent setOutputPartKey(List<Integer> hashIndexes) {
        _hashIndexes = hashIndexes;
        return this;
    }

    @Override
    public EquiJoinComponent setPrintOut(boolean printOut) {
        _printOutSet = true;
        _printOut = printOut;
        return this;
    }

    // Out of the second storage (join of R tuple with S relation)
    public EquiJoinComponent setSecondPreAggProj(ProjectOperator secondPreAggProj) {
        _secondPreAggProj = secondPreAggProj;
        return this;
    }

    public EquiJoinComponent setSecondPreAggStorage(AggregationStorage secondPreAggStorage) {
        _secondStorage = secondPreAggStorage;
        return this;
    }

    @Override
    public Component setSlidingWindow(int windowRange) {
        WindowSemanticsManager._IS_WINDOW_SEMANTICS = true;
        _windowSize = windowRange * 1000; // Width in terms of millis, Default
        // is -1 which is full history
        return this;
    }

    @Override
    public Component setTumblingWindow(int windowRange) {
        WindowSemanticsManager._IS_WINDOW_SEMANTICS = true;
        _tumblingWindowSize = windowRange * 1000;// For tumbling semantics
        return this;
    }

}