Java tutorial
// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package org.apache.impala.analysis; import java.util.ArrayList; import java.util.List; import java.util.ListIterator; import java.util.Set; import org.apache.impala.catalog.Type; import org.apache.impala.common.AnalysisException; import org.apache.impala.planner.DataSink; import org.apache.impala.planner.PlanRootSink; import com.google.common.base.Preconditions; import com.google.common.base.Predicates; import com.google.common.collect.Lists; import com.google.common.collect.Sets; /** * Abstract base class for any statement that returns results * via a list of result expressions, for example a * SelectStmt or UnionStmt. Also maintains a map of expression substitutions * for replacing expressions from ORDER BY or GROUP BY clauses with * their corresponding result expressions. * Used for sharing members/methods and some of the analysis code, in particular the * analysis of the ORDER BY and LIMIT clauses. * */ public abstract class QueryStmt extends StatementBase { ///////////////////////////////////////// // BEGIN: Members that need to be reset() protected WithClause withClause_; protected ArrayList<OrderByElement> orderByElements_; protected LimitElement limitElement_; // For a select statment: // original list of exprs in select clause (star-expanded, ordinals and // aliases substituted, agg output substituted) // For a union statement: // list of slotrefs into the tuple materialized by the union. protected ArrayList<Expr> resultExprs_ = Lists.newArrayList(); // For a select statment: select list exprs resolved to base tbl refs // For a union statement: same as resultExprs protected ArrayList<Expr> baseTblResultExprs_ = Lists.newArrayList(); /** * Map of expression substitutions for replacing aliases * in "order by" or "group by" clauses with their corresponding result expr. */ protected final ExprSubstitutionMap aliasSmap_; /** * Select list item alias does not have to be unique. * This list contains all the non-unique aliases. For example, * select int_col a, string_col a from alltypessmall; * Both columns are using the same alias "a". */ protected final ArrayList<Expr> ambiguousAliasList_; protected SortInfo sortInfo_; // evaluateOrderBy_ is true if there is an order by clause that must be evaluated. // False for nested query stmts with an order-by clause without offset/limit. // sortInfo_ is still generated and used in analysis to ensure that the order-by clause // is well-formed. protected boolean evaluateOrderBy_; ///////////////////////////////////////// // END: Members that need to be reset() QueryStmt(ArrayList<OrderByElement> orderByElements, LimitElement limitElement) { orderByElements_ = orderByElements; sortInfo_ = null; limitElement_ = limitElement == null ? new LimitElement(null, null) : limitElement; aliasSmap_ = new ExprSubstitutionMap(); ambiguousAliasList_ = Lists.newArrayList(); } @Override public void analyze(Analyzer analyzer) throws AnalysisException { if (isAnalyzed()) return; super.analyze(analyzer); analyzeLimit(analyzer); if (hasWithClause()) withClause_.analyze(analyzer); } /** * Returns a list containing all the materialized tuple ids that this stmt is * correlated with (i.e., those tuple ids from outer query blocks that TableRefs * inside this stmt are rooted at). * * Throws if this stmt contains an illegal mix of un/correlated table refs. * A statement is illegal if it contains a TableRef correlated with a parent query * block as well as a table ref with an absolute path (e.g. a BaseTabeRef). Such a * statement would generate a Subplan containing a base table scan (very expensive), * and should therefore be avoided. * * In other words, the following cases are legal: * (1) only uncorrelated table refs * (2) only correlated table refs * (3) a mix of correlated table refs and table refs rooted at those refs * (the statement is 'self-contained' with respect to correlation) */ public List<TupleId> getCorrelatedTupleIds(Analyzer analyzer) throws AnalysisException { // Correlated tuple ids of this stmt. List<TupleId> correlatedTupleIds = Lists.newArrayList(); // First correlated and absolute table refs. Used for error detection/reporting. // We pick the first ones for simplicity. Choosing arbitrary ones is equally valid. TableRef correlatedRef = null; TableRef absoluteRef = null; // Materialized tuple ids of the table refs checked so far. Set<TupleId> tblRefIds = Sets.newHashSet(); List<TableRef> tblRefs = Lists.newArrayList(); collectTableRefs(tblRefs); for (TableRef tblRef : tblRefs) { if (absoluteRef == null && !tblRef.isRelative()) absoluteRef = tblRef; if (tblRef.isCorrelated()) { // Check if the correlated table ref is rooted at a tuple descriptor from within // this query stmt. If so, the correlation is contained within this stmt // and the table ref does not conflict with absolute refs. CollectionTableRef t = (CollectionTableRef) tblRef; Preconditions.checkState(t.getResolvedPath().isRootedAtTuple()); // This check relies on tblRefs being in depth-first order. if (!tblRefIds.contains(t.getResolvedPath().getRootDesc().getId())) { if (correlatedRef == null) correlatedRef = tblRef; correlatedTupleIds.add(t.getResolvedPath().getRootDesc().getId()); } } if (correlatedRef != null && absoluteRef != null) { throw new AnalysisException(String.format( "Nested query is illegal because it contains a table reference '%s' " + "correlated with an outer block as well as an uncorrelated one '%s':\n%s", correlatedRef.tableRefToSql(), absoluteRef.tableRefToSql(), toSql())); } tblRefIds.add(tblRef.getId()); } return correlatedTupleIds; } private void analyzeLimit(Analyzer analyzer) throws AnalysisException { if (limitElement_.getOffsetExpr() != null && !hasOrderByClause()) { throw new AnalysisException("OFFSET requires an ORDER BY clause: " + limitElement_.toSql().trim()); } limitElement_.analyze(analyzer); } /** * Creates sortInfo by resolving aliases and ordinals in the orderingExprs. * If the query stmt is an inline view/union operand, then order-by with no * limit with offset is not allowed, since that requires a sort and merging-exchange, * and subsequent query execution would occur on a single machine. * Sets evaluateOrderBy_ to false for ignored order-by w/o limit/offset in nested * queries. */ protected void createSortInfo(Analyzer analyzer) throws AnalysisException { // not computing order by if (orderByElements_ == null) { evaluateOrderBy_ = false; return; } ArrayList<Expr> orderingExprs = Lists.newArrayList(); ArrayList<Boolean> isAscOrder = Lists.newArrayList(); ArrayList<Boolean> nullsFirstParams = Lists.newArrayList(); // extract exprs for (OrderByElement orderByElement : orderByElements_) { if (orderByElement.getExpr().contains(Predicates.instanceOf(Subquery.class))) { throw new AnalysisException("Subqueries are not supported in the ORDER BY clause."); } // create copies, we don't want to modify the original parse node, in case // we need to print it orderingExprs.add(orderByElement.getExpr().clone()); isAscOrder.add(Boolean.valueOf(orderByElement.isAsc())); nullsFirstParams.add(orderByElement.getNullsFirstParam()); } substituteOrdinalsAliases(orderingExprs, "ORDER BY", analyzer); if (!analyzer.isRootAnalyzer() && hasOffset() && !hasLimit()) { throw new AnalysisException("Order-by with offset without limit not supported" + " in nested queries."); } sortInfo_ = new SortInfo(orderingExprs, isAscOrder, nullsFirstParams); // order by w/o limit and offset in inline views, union operands and insert statements // are ignored. if (!hasLimit() && !hasOffset() && !analyzer.isRootAnalyzer()) { evaluateOrderBy_ = false; // Return a warning that the order by was ignored. StringBuilder strBuilder = new StringBuilder(); strBuilder.append("Ignoring ORDER BY clause without LIMIT or OFFSET: "); strBuilder.append("ORDER BY "); strBuilder.append(orderByElements_.get(0).toSql()); for (int i = 1; i < orderByElements_.size(); ++i) { strBuilder.append(", ").append(orderByElements_.get(i).toSql()); } strBuilder.append(".\nAn ORDER BY appearing in a view, subquery, union operand, "); strBuilder.append("or an insert/ctas statement has no effect on the query result "); strBuilder.append("unless a LIMIT and/or OFFSET is used in conjunction "); strBuilder.append("with the ORDER BY."); analyzer.addWarning(strBuilder.toString()); } else { evaluateOrderBy_ = true; } } /** * Create a tuple descriptor for the single tuple that is materialized, sorted and * output by the exec node implementing the sort. Done by materializing slot refs in * the order-by and result expressions. Those SlotRefs in the ordering and result exprs * are substituted with SlotRefs into the new tuple. This simplifies sorting logic for * total (no limit) sorts. * Done after analyzeAggregation() since ordering and result exprs may refer to the * outputs of aggregation. */ protected void createSortTupleInfo(Analyzer analyzer) throws AnalysisException { Preconditions.checkState(evaluateOrderBy_); for (Expr orderingExpr : sortInfo_.getOrderingExprs()) { if (orderingExpr.getType().isComplexType()) { throw new AnalysisException( String.format("ORDER BY expression '%s' with " + "complex type '%s' is not supported.", orderingExpr.toSql(), orderingExpr.getType().toSql())); } } ExprSubstitutionMap smap = sortInfo_.createSortTupleInfo(resultExprs_, analyzer); for (int i = 0; i < smap.size(); ++i) { Preconditions.checkState(smap.getLhs().get(i) instanceof SlotRef); Preconditions.checkState(smap.getRhs().get(i) instanceof SlotRef); SlotRef inputSlotRef = (SlotRef) smap.getLhs().get(i); SlotRef outputSlotRef = (SlotRef) smap.getRhs().get(i); if (hasLimit()) { analyzer.registerValueTransfer(inputSlotRef.getSlotId(), outputSlotRef.getSlotId()); } else { analyzer.createAuxEquivPredicate(outputSlotRef, inputSlotRef); } } substituteResultExprs(smap, analyzer); } /** * Return the first expr in exprs that is a non-unique alias. Return null if none of * exprs is an ambiguous alias. */ protected Expr getFirstAmbiguousAlias(List<Expr> exprs) { for (Expr exp : exprs) { if (ambiguousAliasList_.contains(exp)) return exp; } return null; } /** * Substitute exprs of the form "<number>" with the corresponding * expressions and any alias references in aliasSmap_. * Modifies exprs list in-place. */ protected void substituteOrdinalsAliases(List<Expr> exprs, String errorPrefix, Analyzer analyzer) throws AnalysisException { Expr ambiguousAlias = getFirstAmbiguousAlias(exprs); if (ambiguousAlias != null) { throw new AnalysisException( "Column '" + ambiguousAlias.toSql() + "' in " + errorPrefix + " clause is ambiguous"); } ListIterator<Expr> i = exprs.listIterator(); while (i.hasNext()) { Expr expr = i.next(); // We can substitute either by ordinal or by alias. // If we substitute by ordinal, we should not replace any aliases, since // the new expression was copied from the select clause context, where // alias substitution is not performed in the same way. Expr substituteExpr = trySubstituteOrdinal(expr, errorPrefix, analyzer); if (substituteExpr == null) { substituteExpr = expr.trySubstitute(aliasSmap_, analyzer, false); } i.set(substituteExpr); } } // Attempt to replace an expression of form "<number>" with the corresponding // select list items. Return null if not an ordinal expression. private Expr trySubstituteOrdinal(Expr expr, String errorPrefix, Analyzer analyzer) throws AnalysisException { if (!(expr instanceof NumericLiteral)) return null; expr.analyze(analyzer); if (!expr.getType().isIntegerType()) return null; long pos = ((NumericLiteral) expr).getLongValue(); if (pos < 1) { throw new AnalysisException(errorPrefix + ": ordinal must be >= 1: " + expr.toSql()); } if (pos > resultExprs_.size()) { throw new AnalysisException( errorPrefix + ": ordinal exceeds number of items in select list: " + expr.toSql()); } // Create copy to protect against accidentally shared state. return resultExprs_.get((int) pos - 1).clone(); } /** * Returns the materialized tuple ids of the output of this stmt. * Used in case this stmt is part of an @InlineViewRef, * since we need to know the materialized tupls ids of a TableRef. * This call must be idempotent because it may be called more than once for Union stmt. * TODO: The name of this function has become outdated due to analytics * producing logical (non-materialized) tuples. Re-think and clean up. */ public abstract void getMaterializedTupleIds(ArrayList<TupleId> tupleIdList); /** * Returns all physical (non-inline-view) TableRefs of this statement and the nested * statements of inline views. The returned TableRefs are in depth-first order. */ public abstract void collectTableRefs(List<TableRef> tblRefs); @Override public List<Expr> getResultExprs() { return resultExprs_; } public void setWithClause(WithClause withClause) { this.withClause_ = withClause; } public boolean hasWithClause() { return withClause_ != null; } public WithClause getWithClause() { return withClause_; } public boolean hasOrderByClause() { return orderByElements_ != null; } public boolean hasLimit() { return limitElement_.getLimitExpr() != null; } public long getLimit() { return limitElement_.getLimit(); } public boolean hasOffset() { return limitElement_.getOffsetExpr() != null; } public long getOffset() { return limitElement_.getOffset(); } public SortInfo getSortInfo() { return sortInfo_; } public boolean evaluateOrderBy() { return evaluateOrderBy_; } public ArrayList<Expr> getBaseTblResultExprs() { return baseTblResultExprs_; } public void setLimit(long limit) throws AnalysisException { Preconditions.checkState(limit >= 0); long newLimit = hasLimit() ? Math.min(limit, getLimit()) : limit; limitElement_ = new LimitElement(new NumericLiteral(Long.toString(newLimit), Type.BIGINT), null); } /** * Mark all slots that need to be materialized for the execution of this stmt. * This excludes slots referenced in resultExprs (it depends on the consumer of * the output of the stmt whether they'll be accessed) and single-table predicates * (the PlanNode that materializes that tuple can decide whether evaluating those * predicates requires slot materialization). * This is called prior to plan tree generation and allows tuple-materializing * PlanNodes to compute their tuple's mem layout. */ public abstract void materializeRequiredSlots(Analyzer analyzer); /** * Mark slots referenced in exprs as materialized. */ protected void materializeSlots(Analyzer analyzer, List<Expr> exprs) { List<SlotId> slotIds = Lists.newArrayList(); for (Expr e : exprs) { e.getIds(null, slotIds); } analyzer.getDescTbl().markSlotsMaterialized(slotIds); } /** * Substitutes the result expressions with smap. Preserves the original types of * those expressions during the substitution. */ public void substituteResultExprs(ExprSubstitutionMap smap, Analyzer analyzer) { resultExprs_ = Expr.substituteList(resultExprs_, smap, analyzer, true); } public DataSink createDataSink() { return new PlanRootSink(); } public ArrayList<OrderByElement> cloneOrderByElements() { if (orderByElements_ == null) return null; ArrayList<OrderByElement> result = Lists.newArrayListWithCapacity(orderByElements_.size()); for (OrderByElement o : orderByElements_) result.add(o.clone()); return result; } public WithClause cloneWithClause() { return withClause_ != null ? withClause_.clone() : null; } /** * C'tor for cloning. */ protected QueryStmt(QueryStmt other) { super(other); withClause_ = other.cloneWithClause(); orderByElements_ = other.cloneOrderByElements(); limitElement_ = other.limitElement_.clone(); resultExprs_ = Expr.cloneList(other.resultExprs_); baseTblResultExprs_ = Expr.cloneList(other.baseTblResultExprs_); aliasSmap_ = other.aliasSmap_.clone(); ambiguousAliasList_ = Expr.cloneList(other.ambiguousAliasList_); sortInfo_ = (other.sortInfo_ != null) ? other.sortInfo_.clone() : null; analyzer_ = other.analyzer_; evaluateOrderBy_ = other.evaluateOrderBy_; } @Override public void reset() { super.reset(); if (orderByElements_ != null) { for (OrderByElement o : orderByElements_) o.getExpr().reset(); } limitElement_.reset(); resultExprs_.clear(); baseTblResultExprs_.clear(); aliasSmap_.clear(); ambiguousAliasList_.clear(); sortInfo_ = null; evaluateOrderBy_ = false; } @Override public abstract QueryStmt clone(); }