cascading.flow.stack.FlowMapperStack.java Source code

Java tutorial

Introduction

Here is the source code for cascading.flow.stack.FlowMapperStack.java

Source

/*
 * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
 *
 * Project and contact information: http://www.cascading.org/
 *
 * This file is part of the Cascading project.
 *
 * Cascading is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Cascading is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Cascading.  If not, see <http://www.gnu.org/licenses/>.
 */

package cascading.flow.stack;

import java.io.IOException;
import java.util.Set;

import cascading.flow.FlowElement;
import cascading.flow.FlowStep;
import cascading.flow.Scope;
import cascading.flow.hadoop.HadoopFlowProcess;
import cascading.operation.Function;
import cascading.pipe.Each;
import cascading.pipe.Group;
import cascading.pipe.Pipe;
import cascading.tap.Tap;
import cascading.tap.TempHfs;
import cascading.tuple.Tuple;
import cascading.util.Util;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.log4j.Logger;

/**
 *
 */
public class FlowMapperStack {
    /** Field LOG */
    private static final Logger LOG = Logger.getLogger(FlowMapperStack.class);

    /** Field step */
    private final FlowStep step;
    /** Field currentSource */
    private final Tap currentSource;
    /** Field flowSession */
    private final HadoopFlowProcess flowProcess;

    /** Field stack */
    private Stack stacks[];
    /** Field sourceElement */
    private SourceMapperStackElement sourceElement;

    /** Class Stack is a simple holder for stack head and tails */
    private class Stack {
        /** Field stackHead */
        MapperStackElement head;
        /** Field stackTail */
        MapperStackElement tail;
    }

    public FlowMapperStack(HadoopFlowProcess flowProcess) throws IOException {
        this.flowProcess = flowProcess;

        JobConf jobConf = flowProcess.getJobConf();
        step = (FlowStep) Util.deserializeBase64(jobConf.getRaw("cascading.flow.step"));

        // is set by the MultiInputSplit
        currentSource = (Tap) Util.deserializeBase64(jobConf.getRaw("cascading.step.source"));

        if (LOG.isDebugEnabled())
            LOG.debug("map current source: " + currentSource);

        buildStack();

        for (Stack stack : stacks)
            stack.tail.open();
    }

    private void buildStack() throws IOException {
        Set<Scope> incomingScopes = step.getNextScopes(currentSource);

        sourceElement = makeSourceElement(incomingScopes);

        stacks = new Stack[incomingScopes.size()];

        int i = 0;
        boolean allFilters = true;

        for (Scope incomingScope : incomingScopes) {
            FlowElement operator = step.getNextFlowElement(incomingScope);

            stacks[i] = new Stack();

            stacks[i].tail = null;

            String trapName = null;
            Tap trap = null;

            while (operator instanceof Each) {
                trapName = ((Pipe) operator).getName();
                trap = step.getMapperTrap(trapName);
                stacks[i].tail = new EachMapperStackElement(stacks[i].tail, flowProcess, incomingScope, trap,
                        (Each) operator);

                if (((Each) operator).getOperation() instanceof Function)
                    allFilters = false;

                incomingScope = step.getNextScope(operator);
                operator = step.getNextFlowElement(incomingScope);
            }

            boolean useTapCollector = false;

            if (operator instanceof Group) {
                Scope outgoingScope = step.getNextScope(operator); // is always Group
                boolean copyTuple = allFilters && i != stacks.length - 1;

                if (!copyTuple) // only copy if the out values are the function results
                    copyTuple = incomingScope.getOutValuesSelector() == null
                            || incomingScope.getOutValuesSelector().equals(incomingScope.getDeclaredFields());

                trapName = ((Pipe) operator).getName();
                trap = step.getMapperTrap(trapName);
                stacks[i].tail = new GroupMapperStackElement(stacks[i].tail, flowProcess, incomingScope, trap,
                        (Group) operator, outgoingScope, copyTuple);
            } else if (operator instanceof Tap) {
                useTapCollector = useTapCollector || ((Tap) operator).isWriteDirect();

                stacks[i].tail = new SinkMapperStackElement(stacks[i].tail, flowProcess, incomingScope, trapName,
                        trap, (Tap) operator, useTapCollector);
            } else
                throw new IllegalStateException(
                        "operator should be group or tap, is instead: " + operator.getClass().getName());

            stacks[i].head = (MapperStackElement) stacks[i].tail.resolveStack();

            i++;
        }
    }

    private SourceMapperStackElement makeSourceElement(Set<Scope> incomingScopes) throws IOException {
        Scope scope = incomingScopes.iterator().next();
        FlowElement operator = step.getNextFlowElement(scope);

        // no need to bother with traps for intermediate sources
        // should prevent confusing info message below
        if (currentSource instanceof TempHfs || !(operator instanceof Pipe))
            return new SourceMapperStackElement(flowProcess, scope, currentSource);

        String trapName = ((Pipe) operator).getName();
        Tap trap = step.getMapperTrap(trapName);

        if (trap != null && incomingScopes.size() != 1)
            LOG.info("more than one possible trap for source tap, using trap named: " + trapName);

        return new SourceMapperStackElement(flowProcess, scope, trapName, trap, currentSource);
    }

    public void map(Object key, Object value, OutputCollector output) throws IOException {
        Tuple tuple = null;

        try {
            tuple = sourceElement.source(key, value);
        } catch (StackException exception) {
            if (exception.getCause() instanceof Error)
                throw (Error) exception.getCause();

            if (exception.getCause() instanceof IOException)
                throw (IOException) exception.getCause();

            throw (RuntimeException) exception.getCause();
        }

        if (LOG.isDebugEnabled()) {
            if (tuple == null)
                LOG.debug("map skipping key and value");

            if (LOG.isTraceEnabled()) {
                if (key instanceof Tuple)
                    LOG.trace("map key: " + ((Tuple) key).print());
                else
                    LOG.trace("map key: [" + key + "]");

                if (tuple != null)
                    LOG.trace("map value: " + tuple.print());
            }
        }

        // skip the key/value pair if null is returned from the source
        if (tuple == null)
            return;

        for (int i = 0; i < stacks.length; i++) {
            stacks[i].tail.setLastOutput(output);

            try {
                stacks[i].head.collect(tuple);
            } catch (StackException exception) {
                if (exception.getCause() instanceof Error)
                    throw (Error) exception.getCause();

                if (exception.getCause() instanceof IOException)
                    throw (IOException) exception.getCause();

                throw (RuntimeException) exception.getCause();
            }
        }
    }

    public void close() throws IOException {
        for (int i = 0; i < stacks.length; i++)
            stacks[i].head.close();
    }
}