Java tutorial
/* * Copyright (c) 2011-2015 EPFL DATA Laboratory * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE) * * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ch.epfl.data.squall.components.theta; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.commons.lang.ArrayUtils; import org.apache.log4j.Logger; import backtype.storm.Config; import backtype.storm.topology.TopologyBuilder; import ch.epfl.data.squall.components.Component; import ch.epfl.data.squall.components.DataSourceComponent; import ch.epfl.data.squall.components.JoinerComponent; import ch.epfl.data.squall.expressions.ValueExpression; import ch.epfl.data.squall.operators.ChainOperator; import ch.epfl.data.squall.operators.Operator; import ch.epfl.data.squall.predicates.Predicate; import ch.epfl.data.squall.storm_components.InterchangingComponent; import ch.epfl.data.squall.storm_components.StormBoltComponent; import ch.epfl.data.squall.storm_components.StormComponent; import ch.epfl.data.squall.storm_components.synchronization.TopologyKiller; import ch.epfl.data.squall.storm_components.theta.StormThetaJoin; import ch.epfl.data.squall.storm_components.theta.StormThetaJoinBDB; import ch.epfl.data.squall.types.Type; import ch.epfl.data.squall.utilities.MyUtilities; import ch.epfl.data.squall.window_semantics.WindowSemanticsManager; public class ThetaJoinComponent extends JoinerComponent implements Component { private static final long serialVersionUID = 1L; private static Logger LOG = Logger.getLogger(ThetaJoinComponent.class); private final Component _firstParent; private final Component _secondParent; private Component _child; private final String _componentName; private long _batchOutputMillis; private List<Integer> _hashIndexes; private List<ValueExpression> _hashExpressions; private StormBoltComponent _joiner; private final ChainOperator _chain = new ChainOperator(); private boolean _printOut; private boolean _printOutSet; // whether printOut was already set private boolean _isContentSensitive; private Predicate _joinPredicate; private InterchangingComponent _interComp = null; private Type _contentSensitiveThetaJoinWrapper = null; // equi-weight histogram private boolean _isPartitioner; public ThetaJoinComponent(Component firstParent, Component secondParent, boolean isContentSensitive) { _firstParent = firstParent; _firstParent.setChild(this); _secondParent = secondParent; _secondParent.setChild(this); _componentName = firstParent.getName() + "_" + secondParent.getName(); _isContentSensitive = isContentSensitive; } @Override public ThetaJoinComponent add(Operator operator) { _chain.addOperator(operator); return this; } @Override public boolean equals(Object obj) { if (obj instanceof Component) return _componentName.equals(((Component) obj).getName()); else return false; } @Override public List<DataSourceComponent> getAncestorDataSources() { final List<DataSourceComponent> list = new ArrayList<DataSourceComponent>(); for (final Component parent : getParents()) list.addAll(parent.getAncestorDataSources()); return list; } @Override public long getBatchOutputMillis() { return _batchOutputMillis; } @Override public ChainOperator getChainOperator() { return _chain; } @Override public Component getChild() { return _child; } // from StormEmitter interface @Override public String[] getEmitterIDs() { return _joiner.getEmitterIDs(); } @Override public List<String> getFullHashList() { throw new RuntimeException("Load balancing for Theta join is done inherently!"); } @Override public List<ValueExpression> getHashExpressions() { return _hashExpressions; } @Override public List<Integer> getHashIndexes() { return _hashIndexes; } @Override public String getInfoID() { return _joiner.getInfoID(); } public Predicate getJoinPredicate() { return _joinPredicate; } @Override public String getName() { return _componentName; } @Override public Component[] getParents() { return new Component[] { _firstParent, _secondParent }; } @Override public boolean getPrintOut() { return _printOut; } @Override public int hashCode() { int hash = 7; hash = 37 * hash + (_componentName != null ? _componentName.hashCode() : 0); return hash; } @Override public void makeBolts(TopologyBuilder builder, TopologyKiller killer, List<String> allCompNames, Config conf, int hierarchyPosition) { // by default print out for the last component // for other conditions, can be set via setPrintOut if (hierarchyPosition == StormComponent.FINAL_COMPONENT && !_printOutSet) setPrintOut(true); MyUtilities.checkBatchOutput(_batchOutputMillis, _chain.getAggregation(), conf); boolean isBDB = MyUtilities.isBDB(conf); if (isBDB && _joinPredicate == null) { throw new RuntimeException("Please provide _joinPredicate if you want to run BDB!"); } if (isBDB && (hierarchyPosition == StormComponent.FINAL_COMPONENT)) { _joiner = new StormThetaJoinBDB(_firstParent, _secondParent, this, allCompNames, _joinPredicate, hierarchyPosition, builder, killer, conf, _interComp); } else { _joiner = new StormThetaJoin(_firstParent, _secondParent, this, allCompNames, _joinPredicate, _isPartitioner, hierarchyPosition, builder, killer, conf, _interComp, _isContentSensitive, _contentSensitiveThetaJoinWrapper); } if (_windowSize > 0 || _tumblingWindowSize > 0) _joiner.setWindowSemantics(_windowSize, _tumblingWindowSize); } @Override public ThetaJoinComponent setBatchOutputMillis(long millis) { _batchOutputMillis = millis; return this; } @Override public void setChild(Component child) { _child = child; } @Override public ThetaJoinComponent setContentSensitiveThetaJoinWrapper(Type wrapper) { _contentSensitiveThetaJoinWrapper = wrapper; return this; } // list of distinct keys, used for direct stream grouping and load-balancing // () @Override public ThetaJoinComponent setFullHashList(List<String> fullHashList) { throw new RuntimeException("Load balancing for Theta join is done inherently!"); } @Override public ThetaJoinComponent setHashExpressions(List<ValueExpression> hashExpressions) { _hashExpressions = hashExpressions; return this; } @Override public ThetaJoinComponent setInterComp(InterchangingComponent inter) { _interComp = inter; return this; } @Override public ThetaJoinComponent setJoinPredicate(Predicate joinPredicate) { _joinPredicate = joinPredicate; return this; } @Override public ThetaJoinComponent setOutputPartKey(int... hashIndexes) { return setOutputPartKey(Arrays.asList(ArrayUtils.toObject(hashIndexes))); } @Override public ThetaJoinComponent setOutputPartKey(List<Integer> hashIndexes) { _hashIndexes = hashIndexes; return this; } public ThetaJoinComponent setPartitioner(boolean isPartitioner) { _isPartitioner = isPartitioner; return this; } @Override public ThetaJoinComponent setPrintOut(boolean printOut) { _printOutSet = true; _printOut = printOut; return this; } @Override public Component setSlidingWindow(int windowRange) { WindowSemanticsManager._IS_WINDOW_SEMANTICS = true; _windowSize = windowRange * 1000; // Width in terms of millis, Default // is -1 which is full history return this; } @Override public Component setTumblingWindow(int windowRange) { WindowSemanticsManager._IS_WINDOW_SEMANTICS = true; _tumblingWindowSize = windowRange * 1000;// For tumbling semantics return this; } }