org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.udf.ptf;

import java.util.AbstractList;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.PTFPartition;
import org.apache.hadoop.hive.ql.exec.PTFPartition.PTFPartitionIterator;
import org.apache.hadoop.hive.ql.exec.PTFRollingPartition;
import org.apache.hadoop.hive.ql.exec.WindowFunctionInfo;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType;
import org.apache.hadoop.hive.ql.plan.PTFDesc;
import org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef;
import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef;
import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
import org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@SuppressWarnings("deprecation")
public class WindowingTableFunction extends TableFunctionEvaluator {
    public static final Logger LOG = LoggerFactory.getLogger(WindowingTableFunction.class.getName());

    static class WindowingFunctionInfoHelper {
        private boolean supportsWindow;

        WindowingFunctionInfoHelper() {
        }

        public WindowingFunctionInfoHelper(boolean supportsWindow) {
            this.supportsWindow = supportsWindow;
        }

        public boolean isSupportsWindow() {
            return supportsWindow;
        }

        public void setSupportsWindow(boolean supportsWindow) {
            this.supportsWindow = supportsWindow;
        }
    }

    StreamingState streamingState;
    RankLimit rnkLimitDef;

    // There is some information about the windowing functions that needs to be initialized
    // during query compilation time, and made available to during the map/reduce tasks via
    // plan serialization.
    Map<String, WindowingFunctionInfoHelper> windowingFunctionHelpers = null;

    public Map<String, WindowingFunctionInfoHelper> getWindowingFunctionHelpers() {
        return windowingFunctionHelpers;
    }

    public void setWindowingFunctionHelpers(Map<String, WindowingFunctionInfoHelper> windowingFunctionHelpers) {
        this.windowingFunctionHelpers = windowingFunctionHelpers;
    }

    @SuppressWarnings({ "unchecked", "rawtypes" })
    @Override
    public void execute(PTFPartitionIterator<Object> pItr, PTFPartition outP) throws HiveException {
        ArrayList<List<?>> oColumns = new ArrayList<List<?>>();
        PTFPartition iPart = pItr.getPartition();
        StructObjectInspector inputOI = iPart.getOutputOI();

        WindowTableFunctionDef wTFnDef = (WindowTableFunctionDef) getTableDef();
        for (WindowFunctionDef wFn : wTFnDef.getWindowFunctions()) {
            boolean processWindow = processWindow(wFn.getWindowFrame());
            pItr.reset();
            if (!processWindow) {
                Object out = evaluateFunctionOnPartition(wFn, iPart);
                if (!wFn.isPivotResult()) {
                    out = new SameList(iPart.size(), out);
                }
                oColumns.add((List<?>) out);
            } else {
                oColumns.add(executeFnwithWindow(wFn, iPart));
            }
        }

        /*
         * Output Columns in the following order
         * - the columns representing the output from Window Fns
         * - the input Rows columns
         */

        for (int i = 0; i < iPart.size(); i++) {
            ArrayList oRow = new ArrayList();
            Object iRow = iPart.getAt(i);

            for (int j = 0; j < oColumns.size(); j++) {
                oRow.add(oColumns.get(j).get(i));
            }

            for (StructField f : inputOI.getAllStructFieldRefs()) {
                oRow.add(inputOI.getStructFieldData(iRow, f));
            }

            outP.append(oRow);
        }
    }

    // Evaluate the result given a partition and the row number to process
    private Object evaluateWindowFunction(WindowFunctionDef wFn, int rowToProcess, PTFPartition partition)
            throws HiveException {
        BasePartitionEvaluator partitionEval = wFn.getWFnEval().getPartitionWindowingEvaluator(wFn.getWindowFrame(),
                partition, wFn.getArgs(), wFn.getOI());
        return partitionEval.iterate(rowToProcess, ptfDesc.getLlInfo());
    }

    // Evaluate the result given a partition
    private Object evaluateFunctionOnPartition(WindowFunctionDef wFn, PTFPartition partition) throws HiveException {
        BasePartitionEvaluator partitionEval = wFn.getWFnEval().getPartitionWindowingEvaluator(wFn.getWindowFrame(),
                partition, wFn.getArgs(), wFn.getOI());
        return partitionEval.getPartitionAgg();
    }

    // Evaluate the function result for each row in the partition
    ArrayList<Object> executeFnwithWindow(WindowFunctionDef wFnDef, PTFPartition iPart) throws HiveException {
        ArrayList<Object> vals = new ArrayList<Object>();
        for (int i = 0; i < iPart.size(); i++) {
            Object out = evaluateWindowFunction(wFnDef, i, iPart);
            vals.add(out);
        }
        return vals;
    }

    private static boolean processWindow(WindowFrameDef frame) {
        if (frame == null) {
            return false;
        }
        if (frame.getStart().getAmt() == BoundarySpec.UNBOUNDED_AMOUNT
                && frame.getEnd().getAmt() == BoundarySpec.UNBOUNDED_AMOUNT) {
            return false;
        }
        return true;
    }

    private boolean streamingPossible(Configuration cfg, WindowFunctionDef wFnDef) throws HiveException {
        WindowFrameDef wdwFrame = wFnDef.getWindowFrame();

        WindowingFunctionInfoHelper wFnInfo = getWindowingFunctionInfoHelper(wFnDef.getName());
        if (!wFnInfo.isSupportsWindow()) {
            return true;
        }

        BoundaryDef start = wdwFrame.getStart();
        BoundaryDef end = wdwFrame.getEnd();

        /*
         * Currently we are not handling dynamic sized windows implied by range
         * based windows.
         */
        if (wdwFrame.getWindowType() == WindowType.RANGE) {
            return false;
        }

        /*
         * Windows that are unbounded following don't benefit from Streaming.
         */
        if (end.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT) {
            return false;
        }

        /*
         * let function decide if it can handle this special case.
         */
        if (start.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT) {
            return true;
        }

        int windowLimit = HiveConf.getIntVar(cfg, ConfVars.HIVEJOINCACHESIZE);

        if (windowLimit < (start.getAmt() + end.getAmt() + 1)) {
            return false;
        }

        return true;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#canAcceptInputAsStream
     * ()
     * 
     * WindowTableFunction supports streaming if all functions meet one of these
     * conditions: 1. The Function implements ISupportStreamingModeForWindowing 2.
     * Or returns a non null Object for the getWindowingEvaluator, that implements
     * ISupportStreamingModeForWindowing. 3. Is an invocation on a 'fixed' window.
     * So no Unbounded Preceding or Following.
     */
    @SuppressWarnings("resource")
    private int[] setCanAcceptInputAsStream(Configuration cfg) throws HiveException {

        canAcceptInputAsStream = false;

        if (ptfDesc.getLlInfo().getLeadLagExprs() != null) {
            return null;
        }

        WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
        int startPos = Integer.MAX_VALUE;
        int endPos = Integer.MIN_VALUE;

        for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
            WindowFunctionDef wFnDef = tabDef.getWindowFunctions().get(i);
            WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
            GenericUDAFEvaluator fnEval = wFnDef.getWFnEval();
            boolean streamingPossible = streamingPossible(cfg, wFnDef);
            GenericUDAFEvaluator streamingEval = streamingPossible ? fnEval.getWindowingEvaluator(wdwFrame) : null;
            if (streamingEval != null && streamingEval instanceof ISupportStreamingModeForWindowing) {
                continue;
            }
            BoundaryDef start = wdwFrame.getStart();
            BoundaryDef end = wdwFrame.getEnd();
            if (wdwFrame.getWindowType() == WindowType.ROWS) {
                if (!end.isUnbounded() && !start.isUnbounded()) {
                    startPos = Math.min(startPos, wdwFrame.getStart().getRelativeOffset());
                    endPos = Math.max(endPos, wdwFrame.getEnd().getRelativeOffset());
                    continue;
                }
            }
            return null;
        }

        int windowLimit = HiveConf.getIntVar(cfg, ConfVars.HIVEJOINCACHESIZE);

        if (windowLimit < (endPos - startPos + 1)) {
            return null;
        }

        canAcceptInputAsStream = true;
        return new int[] { startPos, endPos };
    }

    private void initializeWindowingFunctionInfoHelpers() throws SemanticException {
        // getWindowFunctionInfo() cannot be called during map/reduce tasks. So cache necessary
        // values during query compilation, and rely on plan serialization to bring this info
        // to the object during the map/reduce tasks.
        if (windowingFunctionHelpers != null) {
            return;
        }

        windowingFunctionHelpers = new HashMap<String, WindowingFunctionInfoHelper>();
        WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
        for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
            WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
            WindowFunctionInfo wFnInfo = FunctionRegistry.getWindowFunctionInfo(wFn.getName());
            boolean supportsWindow = wFnInfo.isSupportsWindow();
            windowingFunctionHelpers.put(wFn.getName(), new WindowingFunctionInfoHelper(supportsWindow));
        }
    }

    @Override
    protected void setOutputOI(StructObjectInspector outputOI) {
        super.setOutputOI(outputOI);
        // Call here because at this point the WindowTableFunctionDef has been set
        try {
            initializeWindowingFunctionInfoHelpers();
        } catch (SemanticException err) {
            throw new RuntimeException("Unexpected error while setting up windowing function", err);
        }
    }

    private WindowingFunctionInfoHelper getWindowingFunctionInfoHelper(String fnName) {
        WindowingFunctionInfoHelper wFnInfoHelper = windowingFunctionHelpers.get(fnName);
        if (wFnInfoHelper == null) {
            // Should not happen
            throw new RuntimeException("No cached WindowingFunctionInfoHelper for " + fnName);
        }
        return wFnInfoHelper;
    }

    @Override
    public void initializeStreaming(Configuration cfg, StructObjectInspector inputOI, boolean isMapSide)
            throws HiveException {

        int[] span = setCanAcceptInputAsStream(cfg);
        if (!canAcceptInputAsStream) {
            return;
        }

        WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();

        for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
            WindowFunctionDef wFnDef = tabDef.getWindowFunctions().get(i);
            WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
            GenericUDAFEvaluator fnEval = wFnDef.getWFnEval();
            GenericUDAFEvaluator streamingEval = fnEval.getWindowingEvaluator(wdwFrame);
            if (streamingEval != null) {
                wFnDef.setWFnEval(streamingEval);
                if (wFnDef.isPivotResult()) {
                    ListObjectInspector listOI = (ListObjectInspector) wFnDef.getOI();
                    wFnDef.setOI(listOI.getListElementObjectInspector());
                }
            }
        }

        if (tabDef.getRankLimit() != -1) {
            rnkLimitDef = new RankLimit(tabDef.getRankLimit(), tabDef.getRankLimitFunction(),
                    tabDef.getWindowFunctions());
        }

        streamingState = new StreamingState(cfg, inputOI, isMapSide, tabDef, span[0], span[1]);
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#startPartition()
     */
    @Override
    public void startPartition() throws HiveException {
        WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
        streamingState.reset(tabDef);
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#processRow(java
     * .lang.Object)
     * 
     * - hand row to each Function, provided there are enough rows for Function's
     * window. - call getNextObject on each Function. - output as many rows as
     * possible, based on minimum sz of Output List
     */
    @Override
    public List<Object> processRow(Object row) throws HiveException {

        /*
         * Once enough rows have been output, there is no need to process input rows.
         */
        if (streamingState.rankLimitReached()) {
            return null;
        }

        streamingState.rollingPart.append(row);

        WindowTableFunctionDef tabDef = (WindowTableFunctionDef) tableDef;

        for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
            WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
            GenericUDAFEvaluator fnEval = wFn.getWFnEval();

            int a = 0;
            if (wFn.getArgs() != null) {
                for (PTFExpressionDef arg : wFn.getArgs()) {
                    streamingState.funcArgs[i][a++] = arg.getExprEvaluator().evaluate(row);
                }
            }

            if (fnEval != null && fnEval instanceof ISupportStreamingModeForWindowing) {
                fnEval.aggregate(streamingState.aggBuffers[i], streamingState.funcArgs[i]);
                Object out = ((ISupportStreamingModeForWindowing) fnEval)
                        .getNextResult(streamingState.aggBuffers[i]);
                if (out != null) {
                    streamingState.fnOutputs[i]
                            .add(out == ISupportStreamingModeForWindowing.NULL_RESULT ? null : out);
                }
            } else {
                int rowToProcess = streamingState.rollingPart.rowToProcess(wFn.getWindowFrame());
                if (rowToProcess >= 0) {
                    Object out = evaluateWindowFunction(wFn, rowToProcess, streamingState.rollingPart);
                    streamingState.fnOutputs[i].add(out);
                }
            }
        }

        List<Object> oRows = new ArrayList<Object>();
        while (true) {
            boolean hasRow = streamingState.hasOutputRow();

            if (!hasRow) {
                break;
            }

            oRows.add(streamingState.nextOutputRow());
        }

        return oRows.size() == 0 ? null : oRows;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#finishPartition()
     * 
     * for fns that are not ISupportStreamingModeForWindowing give them the
     * remaining rows (rows whose span went beyond the end of the partition) for
     * rest of the functions invoke terminate.
     * 
     * while numOutputRows < numInputRows for each Fn that doesn't have enough o/p
     * invoke getNextObj if there is no O/p then flag this as an error.
     */
    @Override
    public List<Object> finishPartition() throws HiveException {

        /*
         * Once enough rows have been output, there is no need to generate more output.
         */
        if (streamingState.rankLimitReached()) {
            return null;
        }

        WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
        for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
            WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
            GenericUDAFEvaluator fnEval = wFn.getWFnEval();

            int numRowsRemaining = wFn.getWindowFrame().getEnd().getRelativeOffset();
            if (fnEval != null && fnEval instanceof ISupportStreamingModeForWindowing) {
                fnEval.terminate(streamingState.aggBuffers[i]);

                WindowingFunctionInfoHelper wFnInfo = getWindowingFunctionInfoHelper(wFn.getName());
                if (!wFnInfo.isSupportsWindow()) {
                    numRowsRemaining = ((ISupportStreamingModeForWindowing) fnEval)
                            .getRowsRemainingAfterTerminate();
                }

                if (numRowsRemaining != BoundarySpec.UNBOUNDED_AMOUNT) {
                    while (numRowsRemaining > 0) {
                        Object out = ((ISupportStreamingModeForWindowing) fnEval)
                                .getNextResult(streamingState.aggBuffers[i]);
                        if (out != null) {
                            streamingState.fnOutputs[i]
                                    .add(out == ISupportStreamingModeForWindowing.NULL_RESULT ? null : out);
                        }
                        numRowsRemaining--;
                    }
                }
            } else {
                while (numRowsRemaining > 0) {
                    int rowToProcess = streamingState.rollingPart.size() - numRowsRemaining;
                    if (rowToProcess >= 0) {
                        Object out = evaluateWindowFunction(wFn, rowToProcess, streamingState.rollingPart);
                        streamingState.fnOutputs[i].add(out);
                    }
                    numRowsRemaining--;
                }
            }
        }

        List<Object> oRows = new ArrayList<Object>();

        while (!streamingState.rollingPart.processedAllRows() && !streamingState.rankLimitReached()) {
            boolean hasRow = streamingState.hasOutputRow();

            if (!hasRow && !streamingState.rankLimitReached()) {
                throw new HiveException("Internal Error: cannot generate all output rows for a Partition");
            }
            if (hasRow) {
                oRows.add(streamingState.nextOutputRow());
            }
        }

        return oRows.size() == 0 ? null : oRows;
    }

    @Override
    public boolean canIterateOutput() {
        return true;
    }

    @SuppressWarnings("rawtypes")
    @Override
    public Iterator<Object> iterator(PTFPartitionIterator<Object> pItr) throws HiveException {
        WindowTableFunctionDef wTFnDef = (WindowTableFunctionDef) getTableDef();
        ArrayList<Object> output = new ArrayList<Object>();
        List<?>[] outputFromPivotFunctions = new List<?>[wTFnDef.getWindowFunctions().size()];
        ArrayList<Integer> wFnsWithWindows = new ArrayList<Integer>();
        PTFPartition iPart = pItr.getPartition();

        int i = 0;
        for (WindowFunctionDef wFn : wTFnDef.getWindowFunctions()) {
            boolean processWindow = processWindow(wFn.getWindowFrame());
            pItr.reset();
            if (!processWindow && !wFn.isPivotResult()) {
                Object out = evaluateFunctionOnPartition(wFn, iPart);
                output.add(out);
            } else if (wFn.isPivotResult()) {
                GenericUDAFEvaluator streamingEval = wFn.getWFnEval().getWindowingEvaluator(wFn.getWindowFrame());
                if (streamingEval != null && streamingEval instanceof ISupportStreamingModeForWindowing) {
                    ISupportStreamingModeForWindowing strEval = (ISupportStreamingModeForWindowing) streamingEval;
                    if (strEval.getRowsRemainingAfterTerminate() == 0) {
                        wFn.setWFnEval(streamingEval);
                        if (wFn.getOI() instanceof ListObjectInspector) {
                            ListObjectInspector listOI = (ListObjectInspector) wFn.getOI();
                            wFn.setOI(listOI.getListElementObjectInspector());
                        }
                        output.add(null);
                        wFnsWithWindows.add(i);
                    } else {
                        outputFromPivotFunctions[i] = (List) evaluateFunctionOnPartition(wFn, iPart);
                        output.add(null);
                    }
                } else {
                    outputFromPivotFunctions[i] = (List) evaluateFunctionOnPartition(wFn, iPart);
                    output.add(null);
                }
            } else {
                output.add(null);
                wFnsWithWindows.add(i);
            }
            i++;
        }

        for (i = 0; i < iPart.getOutputOI().getAllStructFieldRefs().size(); i++) {
            output.add(null);
        }

        if (wTFnDef.getRankLimit() != -1) {
            rnkLimitDef = new RankLimit(wTFnDef.getRankLimit(), wTFnDef.getRankLimitFunction(),
                    wTFnDef.getWindowFunctions());
        }

        return new WindowingIterator(iPart, output, outputFromPivotFunctions,
                ArrayUtils.toPrimitive(wFnsWithWindows.toArray(new Integer[wFnsWithWindows.size()])));
    }

    public static class WindowingTableFunctionResolver extends TableFunctionResolver {
        /*
         * OI of object constructed from output of Wdw Fns; before it is put
         * in the Wdw Processing Partition. Set by Translator/Deserializer.
         */
        private transient StructObjectInspector wdwProcessingOutputOI;

        public StructObjectInspector getWdwProcessingOutputOI() {
            return wdwProcessingOutputOI;
        }

        public void setWdwProcessingOutputOI(StructObjectInspector wdwProcessingOutputOI) {
            this.wdwProcessingOutputOI = wdwProcessingOutputOI;
        }

        @Override
        protected TableFunctionEvaluator createEvaluator(PTFDesc ptfDesc, PartitionedTableFunctionDef tDef) {

            return new WindowingTableFunction();
        }

        @Override
        public void setupOutputOI() throws SemanticException {
            setOutputOI(wdwProcessingOutputOI);
        }

        /*
         * Setup the OI based on the:
         * - Input TableDef's columns
         * - the Window Functions.
         */
        @Override
        public void initializeOutputOI() throws HiveException {
            setupOutputOI();
        }

        @Override
        public boolean transformsRawInput() {
            return false;
        }

        /*
         * (non-Javadoc)
         * @see org.apache.hadoop.hive.ql.udf.ptf.TableFunctionResolver#carryForwardNames()
         * Setting to true is correct only for special internal Functions.
         */
        @Override
        public boolean carryForwardNames() {
            return true;
        }

        /*
         * (non-Javadoc)
         * @see org.apache.hadoop.hive.ql.udf.ptf.TableFunctionResolver#getOutputNames()
         * Set to null only because carryForwardNames is true.
         */
        @Override
        public ArrayList<String> getOutputColumnNames() {
            return null;
        }

    }

    public static class SameList<E> extends AbstractList<E> {
        int sz;
        E val;

        public SameList(int sz, E val) {
            this.sz = sz;
            this.val = val;
        }

        @Override
        public E get(int index) {
            return val;
        }

        @Override
        public int size() {
            return sz;
        }

    }

    public class WindowingIterator implements Iterator<Object> {

        ArrayList<Object> output;
        List<?>[] outputFromPivotFunctions;
        int currIdx;
        PTFPartition iPart;
        /*
         * these are the functions that have a Window.
         * Fns w/o a Window have already been processed.
         */
        int[] wFnsToProcess;
        WindowTableFunctionDef wTFnDef;
        PTFDesc ptfDesc;
        StructObjectInspector inputOI;
        AggregationBuffer[] aggBuffers;
        Object[][] args;
        RankLimit rnkLimit;

        WindowingIterator(PTFPartition iPart, ArrayList<Object> output, List<?>[] outputFromPivotFunctions,
                int[] wFnsToProcess) {
            this.iPart = iPart;
            this.output = output;
            this.outputFromPivotFunctions = outputFromPivotFunctions;
            this.wFnsToProcess = wFnsToProcess;
            this.currIdx = 0;
            wTFnDef = (WindowTableFunctionDef) getTableDef();
            ptfDesc = getQueryDef();
            inputOI = iPart.getOutputOI();

            aggBuffers = new AggregationBuffer[wTFnDef.getWindowFunctions().size()];
            args = new Object[wTFnDef.getWindowFunctions().size()][];
            try {
                for (int j : wFnsToProcess) {
                    WindowFunctionDef wFn = wTFnDef.getWindowFunctions().get(j);
                    aggBuffers[j] = wFn.getWFnEval().getNewAggregationBuffer();
                    args[j] = new Object[wFn.getArgs() == null ? 0 : wFn.getArgs().size()];
                }
            } catch (HiveException he) {
                throw new RuntimeException(he);
            }
            if (WindowingTableFunction.this.rnkLimitDef != null) {
                rnkLimit = new RankLimit(WindowingTableFunction.this.rnkLimitDef);
            }
        }

        @Override
        public boolean hasNext() {

            if (rnkLimit != null && rnkLimit.limitReached()) {
                return false;
            }
            return currIdx < iPart.size();
        }

        // Given the data in a partition, evaluate the result for the next row for
        // streaming and batch mode
        @Override
        public Object next() {
            int i;
            for (i = 0; i < outputFromPivotFunctions.length; i++) {
                if (outputFromPivotFunctions[i] != null) {
                    output.set(i, outputFromPivotFunctions[i].get(currIdx));
                }
            }

            try {
                for (int j : wFnsToProcess) {
                    WindowFunctionDef wFn = wTFnDef.getWindowFunctions().get(j);
                    if (wFn.getWFnEval() instanceof ISupportStreamingModeForWindowing) {
                        Object iRow = iPart.getAt(currIdx);
                        int a = 0;
                        if (wFn.getArgs() != null) {
                            for (PTFExpressionDef arg : wFn.getArgs()) {
                                args[j][a++] = arg.getExprEvaluator().evaluate(iRow);
                            }
                        }
                        wFn.getWFnEval().aggregate(aggBuffers[j], args[j]);
                        Object out = ((ISupportStreamingModeForWindowing) wFn.getWFnEval())
                                .getNextResult(aggBuffers[j]);
                        if (out != null) {
                            if (out == ISupportStreamingModeForWindowing.NULL_RESULT) {
                                out = null;
                            } else {
                                out = ObjectInspectorUtils.copyToStandardObject(out, wFn.getOI());
                            }
                        }
                        output.set(j, out);
                    } else {
                        Object out = evaluateWindowFunction(wFn, currIdx, iPart);
                        output.set(j, out);
                    }
                }

                Object iRow = iPart.getAt(currIdx);
                i = wTFnDef.getWindowFunctions().size();
                for (StructField f : inputOI.getAllStructFieldRefs()) {
                    output.set(i++, inputOI.getStructFieldData(iRow, f));
                }

            } catch (HiveException he) {
                throw new RuntimeException(he);
            }

            if (rnkLimit != null) {
                rnkLimit.updateRank(output);
            }
            currIdx++;
            return output;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }

    }

    class StreamingState {
        PTFRollingPartition rollingPart;
        List<Object>[] fnOutputs;
        AggregationBuffer[] aggBuffers;
        Object[][] funcArgs;
        RankLimit rnkLimit;

        @SuppressWarnings("unchecked")
        StreamingState(Configuration cfg, StructObjectInspector inputOI, boolean isMapSide,
                WindowTableFunctionDef tabDef, int precedingSpan, int followingSpan) throws HiveException {
            AbstractSerDe serde = isMapSide ? tabDef.getInput().getOutputShape().getSerde()
                    : tabDef.getRawInputShape().getSerde();
            StructObjectInspector outputOI = isMapSide ? tabDef.getInput().getOutputShape().getOI()
                    : tabDef.getRawInputShape().getOI();
            rollingPart = PTFPartition.createRolling(cfg, serde, inputOI, outputOI, precedingSpan, followingSpan);

            int numFns = tabDef.getWindowFunctions().size();
            fnOutputs = new ArrayList[numFns];

            aggBuffers = new AggregationBuffer[numFns];
            funcArgs = new Object[numFns][];
            for (int i = 0; i < numFns; i++) {
                fnOutputs[i] = new ArrayList<Object>();
                WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
                funcArgs[i] = new Object[wFn.getArgs() == null ? 0 : wFn.getArgs().size()];
                aggBuffers[i] = wFn.getWFnEval().getNewAggregationBuffer();
            }
            if (WindowingTableFunction.this.rnkLimitDef != null) {
                rnkLimit = new RankLimit(WindowingTableFunction.this.rnkLimitDef);
            }
        }

        void reset(WindowTableFunctionDef tabDef) throws HiveException {
            int numFns = tabDef.getWindowFunctions().size();
            rollingPart.reset();
            for (int i = 0; i < fnOutputs.length; i++) {
                fnOutputs[i].clear();
            }

            for (int i = 0; i < numFns; i++) {
                WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
                aggBuffers[i] = wFn.getWFnEval().getNewAggregationBuffer();
            }

            if (rnkLimit != null) {
                rnkLimit.reset();
            }
        }

        boolean hasOutputRow() {
            if (rankLimitReached()) {
                return false;
            }

            for (int i = 0; i < fnOutputs.length; i++) {
                if (fnOutputs[i].size() == 0) {
                    return false;
                }
            }
            return true;
        }

        private List<Object> nextOutputRow() throws HiveException {
            List<Object> oRow = new ArrayList<Object>();
            Object iRow = rollingPart.nextOutputRow();
            int i = 0;
            for (; i < fnOutputs.length; i++) {
                oRow.add(fnOutputs[i].remove(0));
            }
            for (StructField f : rollingPart.getOutputOI().getAllStructFieldRefs()) {
                oRow.add(rollingPart.getOutputOI().getStructFieldData(iRow, f));
            }
            if (rnkLimit != null) {
                rnkLimit.updateRank(oRow);
            }
            return oRow;
        }

        boolean rankLimitReached() {
            return rnkLimit != null && rnkLimit.limitReached();
        }
    }

    static class RankLimit {

        /*
         * Rows with a rank <= rankLimit are output.
         * Only the first row with rank = rankLimit is output.
         */
        final int rankLimit;

        /*
         * the rankValue of the last row output.
         */
        int currentRank;

        /*
         * index of Rank function.
         */
        final int rankFnIdx;

        final PrimitiveObjectInspector fnOutOI;

        RankLimit(int rankLimit, int rankFnIdx, List<WindowFunctionDef> wdwFnDefs) {
            this.rankLimit = rankLimit;
            this.rankFnIdx = rankFnIdx;
            this.fnOutOI = (PrimitiveObjectInspector) wdwFnDefs.get(rankFnIdx).getOI();
            this.currentRank = -1;
        }

        RankLimit(RankLimit rl) {
            this.rankLimit = rl.rankLimit;
            this.rankFnIdx = rl.rankFnIdx;
            this.fnOutOI = rl.fnOutOI;
            this.currentRank = -1;
        }

        void reset() {
            this.currentRank = -1;
        }

        void updateRank(List<Object> oRow) {
            int r = (Integer) fnOutOI.getPrimitiveJavaObject(oRow.get(rankFnIdx));
            if (r > currentRank) {
                currentRank = r;
            }
        }

        boolean limitReached() {
            return currentRank >= rankLimit;
        }
    }

}