Java tutorial
// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package org.apache.impala.planner; import java.util.List; import java.util.Set; import org.apache.impala.analysis.Analyzer; import org.apache.impala.analysis.Expr; import org.apache.impala.analysis.TupleId; import org.apache.impala.common.AnalysisException; import org.apache.impala.common.InternalException; import org.apache.impala.common.NotImplementedException; import org.apache.impala.common.TreeNode; import org.apache.impala.thrift.TExplainLevel; import org.apache.impala.thrift.TPartitionType; import org.apache.impala.thrift.TPlanFragment; import org.apache.impala.thrift.TPlanFragmentTree; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Preconditions; import com.google.common.base.Predicates; import com.google.common.collect.Lists; import com.google.common.collect.Sets; /** * PlanFragments form a tree structure via their ExchangeNodes. A tree of fragments * connected in that way forms a plan. The output of a plan is produced by the root * fragment and is either the result of the query or an intermediate result * needed by a different plan (such as a hash table). * * Plans are grouped into cohorts based on the consumer of their output: all * plans that materialize intermediate results for a particular consumer plan * are grouped into a single cohort. * * A PlanFragment encapsulates the specific tree of execution nodes that * are used to produce the output of the plan fragment, as well as output exprs, * destination node, etc. If there are no output exprs, the full row that is * is produced by the plan root is marked as materialized. * * A plan fragment can have one or many instances, each of which in turn is executed by * an individual node and the output sent to a specific instance of the destination * fragment (or, in the case of the root fragment, is materialized in some form). * * A hash-partitioned plan fragment is the result of one or more hash-partitioning data * streams being received by plan nodes in this fragment. In the future, a fragment's * data partition could also be hash partitioned based on a scan node that is reading * from a physically hash-partitioned table. * * The sequence of calls is: * - c'tor * - assemble with getters, etc. * - finalize() * - toThrift() * * TODO: the tree of PlanNodes is connected across fragment boundaries, which makes * it impossible search for things within a fragment (using TreeNode functions); * fix that */ public class PlanFragment extends TreeNode<PlanFragment> { private final PlanFragmentId fragmentId_; private PlanId planId_; private CohortId cohortId_; // root of plan tree executed by this fragment private PlanNode planRoot_; // exchange node to which this fragment sends its output private ExchangeNode destNode_; // if null, outputs the entire row produced by planRoot_ private List<Expr> outputExprs_; // created in finalize() or set in setSink() private DataSink sink_; // specification of the partition of the input of this fragment; // an UNPARTITIONED fragment is executed on only a single node // TODO: improve this comment, "input" is a bit misleading private DataPartition dataPartition_; // specification of how the output of this fragment is partitioned (i.e., how // it's sent to its destination); // if the output is UNPARTITIONED, it is being broadcast private DataPartition outputPartition_; /** * C'tor for fragment with specific partition; the output is by default broadcast. */ public PlanFragment(PlanFragmentId id, PlanNode root, DataPartition partition) { fragmentId_ = id; planRoot_ = root; dataPartition_ = partition; outputPartition_ = DataPartition.UNPARTITIONED; setFragmentInPlanTree(planRoot_); } /** * Assigns 'this' as fragment of all PlanNodes in the plan tree rooted at node. * Does not traverse the children of ExchangeNodes because those must belong to a * different fragment. */ public void setFragmentInPlanTree(PlanNode node) { if (node == null) return; node.setFragment(this); if (node instanceof ExchangeNode) return; for (PlanNode child : node.getChildren()) setFragmentInPlanTree(child); } /** * Collect all PlanNodes that belong to the exec tree of this fragment. */ public void collectPlanNodes(List<PlanNode> nodes) { Preconditions.checkNotNull(nodes); collectPlanNodesHelper(planRoot_, nodes); } private void collectPlanNodesHelper(PlanNode root, List<PlanNode> nodes) { if (root == null) return; nodes.add(root); if (root instanceof ExchangeNode) return; for (PlanNode child : root.getChildren()) collectPlanNodesHelper(child, nodes); } public void setOutputExprs(List<Expr> outputExprs) { outputExprs_ = Expr.cloneList(outputExprs); } public List<Expr> getOutputExprs() { return outputExprs_; } /** * Finalize plan tree and create stream sink, if needed. * If this fragment is hash partitioned, ensures that the corresponding partition * exprs of all hash-partitioning senders are cast to identical types. * Otherwise, the hashes generated for identical partition values may differ * among senders if the partition-expr types are not identical. */ public void finalize(Analyzer analyzer) throws InternalException, NotImplementedException { if (destNode_ != null) { Preconditions.checkState(sink_ == null); // we're streaming to an exchange node DataStreamSink streamSink = new DataStreamSink(destNode_, outputPartition_); streamSink.setFragment(this); sink_ = streamSink; } if (!dataPartition_.isHashPartitioned()) return; // This fragment is hash partitioned. Gather all exchange nodes and ensure // that all hash-partitioning senders hash on exprs-values of the same type. List<ExchangeNode> exchNodes = Lists.newArrayList(); planRoot_.collect(Predicates.instanceOf(ExchangeNode.class), exchNodes); // Contains partition-expr lists of all hash-partitioning sender fragments. List<List<Expr>> senderPartitionExprs = Lists.newArrayList(); for (ExchangeNode exchNode : exchNodes) { Preconditions.checkState(!exchNode.getChildren().isEmpty()); PlanFragment senderFragment = exchNode.getChild(0).getFragment(); Preconditions.checkNotNull(senderFragment); if (!senderFragment.getOutputPartition().isHashPartitioned()) continue; List<Expr> partExprs = senderFragment.getOutputPartition().getPartitionExprs(); // All hash-partitioning senders must have compatible partition exprs, otherwise // this fragment's data partition must not be hash partitioned. Preconditions.checkState(partExprs.size() == dataPartition_.getPartitionExprs().size()); senderPartitionExprs.add(partExprs); } // Cast all corresponding hash partition exprs of all hash-partitioning senders // to their compatible types. Also cast the data partition's exprs for consistency, // although not strictly necessary. They should already be type identical to the // exprs of one of the senders and they are not directly used for hashing in the BE. senderPartitionExprs.add(dataPartition_.getPartitionExprs()); try { analyzer.castToUnionCompatibleTypes(senderPartitionExprs); } catch (AnalysisException e) { // Should never happen. Analysis should have ensured type compatibility already. throw new IllegalStateException(e); } } /** * Return the number of nodes on which the plan fragment will execute. * invalid: -1 */ public int getNumNodes() { return dataPartition_ == DataPartition.UNPARTITIONED ? 1 : planRoot_.getNumNodes(); } /** * Estimates the per-node number of distinct values of exprs based on the data * partition of this fragment and its number of nodes. Returns -1 for an invalid * estimate, e.g., because getNumDistinctValues() failed on one of the exprs. */ public long getNumDistinctValues(List<Expr> exprs) { Preconditions.checkNotNull(dataPartition_); long result = 1; int numNodes = getNumNodes(); Preconditions.checkState(numNodes >= 0); // The number of nodes is zero for empty tables. if (numNodes == 0) return 0; for (Expr expr : exprs) { long numDistinct = expr.getNumDistinctValues(); if (numDistinct == -1) { result = -1; break; } if (dataPartition_.getPartitionExprs().contains(expr)) { numDistinct = (long) Math.max((double) numDistinct / (double) numNodes, 1L); } result = PlanNode.multiplyCardinalities(result, numDistinct); } return result; } public TPlanFragment toThrift() { TPlanFragment result = new TPlanFragment(); result.setDisplay_name(fragmentId_.toString()); if (planRoot_ != null) result.setPlan(planRoot_.treeToThrift()); if (outputExprs_ != null) { result.setOutput_exprs(Expr.treesToThrift(outputExprs_)); } if (sink_ != null) result.setOutput_sink(sink_.toThrift()); result.setPartition(dataPartition_.toThrift()); return result; } public TPlanFragmentTree treeToThrift() { TPlanFragmentTree result = new TPlanFragmentTree(); treeToThriftHelper(result); return result; } private void treeToThriftHelper(TPlanFragmentTree plan) { plan.addToFragments(toThrift()); for (PlanFragment child : children_) { child.treeToThriftHelper(plan); } } public String getExplainString(TExplainLevel detailLevel) { return getExplainString("", "", detailLevel); } /** * The root of the output tree will be prefixed by rootPrefix and the remaining plan * output will be prefixed by prefix. */ protected final String getExplainString(String rootPrefix, String prefix, TExplainLevel detailLevel) { StringBuilder str = new StringBuilder(); Preconditions.checkState(dataPartition_ != null); String detailPrefix = prefix + "| "; // sink detail if (detailLevel == TExplainLevel.VERBOSE) { // we're printing a new tree, start over with the indentation prefix = " "; rootPrefix = " "; detailPrefix = prefix + "| "; str.append(String.format("%s:PLAN FRAGMENT [%s]\n", fragmentId_.toString(), dataPartition_.getExplainString())); if (sink_ != null && sink_ instanceof DataStreamSink) { str.append(sink_.getExplainString(rootPrefix, prefix, detailLevel) + "\n"); } } String planRootPrefix = rootPrefix; // Always print sinks other than DataStreamSinks. if (sink_ != null && !(sink_ instanceof DataStreamSink)) { str.append(sink_.getExplainString(rootPrefix, detailPrefix, detailLevel)); if (detailLevel.ordinal() >= TExplainLevel.STANDARD.ordinal()) { str.append(prefix + "|\n"); } // we already used the root prefix for the sink planRootPrefix = prefix; } if (planRoot_ != null) { str.append(planRoot_.getExplainString(planRootPrefix, prefix, detailLevel)); } return str.toString(); } /** Returns true if this fragment is partitioned. */ public boolean isPartitioned() { return (dataPartition_.getType() != TPartitionType.UNPARTITIONED); } public PlanFragmentId getId() { return fragmentId_; } public PlanId getPlanId() { return planId_; } public void setPlanId(PlanId id) { planId_ = id; } public CohortId getCohortId() { return cohortId_; } public void setCohortId(CohortId id) { cohortId_ = id; } public PlanFragment getDestFragment() { if (destNode_ == null) return null; return destNode_.getFragment(); } public ExchangeNode getDestNode() { return destNode_; } public DataPartition getDataPartition() { return dataPartition_; } public void setDataPartition(DataPartition dataPartition) { this.dataPartition_ = dataPartition; } public DataPartition getOutputPartition() { return outputPartition_; } public void setOutputPartition(DataPartition outputPartition) { this.outputPartition_ = outputPartition; } public PlanNode getPlanRoot() { return planRoot_; } public void setPlanRoot(PlanNode root) { planRoot_ = root; setFragmentInPlanTree(planRoot_); } public void setDestination(ExchangeNode destNode) { destNode_ = destNode; PlanFragment dest = getDestFragment(); Preconditions.checkNotNull(dest); dest.addChild(this); } public boolean hasSink() { return sink_ != null; } public DataSink getSink() { return sink_; } public void setSink(DataSink sink) { Preconditions.checkState(this.sink_ == null); Preconditions.checkNotNull(sink); sink.setFragment(this); this.sink_ = sink; } /** * Adds a node as the new root to the plan tree. Connects the existing * root as the child of newRoot. */ public void addPlanRoot(PlanNode newRoot) { Preconditions.checkState(newRoot.getChildren().size() == 1); newRoot.setChild(0, planRoot_); planRoot_ = newRoot; planRoot_.setFragment(this); } /** * Verify that the tree of PlanFragments and their contained tree of * PlanNodes is constructed correctly. */ public void verifyTree() { // PlanNode.fragment_ is set correctly List<PlanNode> nodes = Lists.newArrayList(); collectPlanNodes(nodes); List<PlanNode> exchNodes = Lists.newArrayList(); for (PlanNode node : nodes) { if (node instanceof ExchangeNode) exchNodes.add(node); Preconditions.checkState(node.getFragment() == this); } // all ExchangeNodes have registered input fragments Preconditions.checkState(exchNodes.size() == getChildren().size()); List<PlanFragment> childFragments = Lists.newArrayList(); for (PlanNode exchNode : exchNodes) { PlanFragment childFragment = exchNode.getChild(0).getFragment(); Preconditions.checkState(!childFragments.contains(childFragment)); childFragments.add(childFragment); Preconditions.checkState(childFragment.getDestNode() == exchNode); } // all registered children are accounted for Preconditions.checkState(getChildren().containsAll(childFragments)); for (PlanFragment child : getChildren()) child.verifyTree(); } /** * Returns true if 'exprs' reference a tuple that is made nullable in this fragment, * but not in any of its input fragments. */ public boolean refsNullableTupleId(List<Expr> exprs) { Preconditions.checkNotNull(planRoot_); List<TupleId> tids = Lists.newArrayList(); for (Expr e : exprs) e.getIds(tids, null); Set<TupleId> nullableTids = Sets.newHashSet(planRoot_.getNullableTupleIds()); // Remove all tuple ids that were made nullable in an input fragment. List<ExchangeNode> exchNodes = Lists.newArrayList(); planRoot_.collect(ExchangeNode.class, exchNodes); for (ExchangeNode exchNode : exchNodes) { nullableTids.removeAll(exchNode.getNullableTupleIds()); } for (TupleId tid : tids) if (nullableTids.contains(tid)) return true; return false; } }