eu.stratosphere.core.testing.GenericTestPlan.java Source code

Java tutorial

Introduction

Here is the source code for eu.stratosphere.core.testing.GenericTestPlan.java

Source

/***********************************************************************************************************************
 *
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 *
 **********************************************************************************************************************/

package eu.stratosphere.core.testing;

import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.IdentityHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.Assert;

import com.esotericsoftware.kryo.util.UnsafeUtil;

import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.common.io.FileInputFormat;
import eu.stratosphere.api.common.io.FileOutputFormat;
import eu.stratosphere.api.common.io.GenericInputFormat;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.base.FileDataSinkBase;
import eu.stratosphere.api.common.operators.base.FileDataSourceBase;
import eu.stratosphere.api.common.operators.base.GenericDataSinkBase;
import eu.stratosphere.api.common.operators.base.GenericDataSourceBase;
import eu.stratosphere.client.LocalExecutor;
import eu.stratosphere.configuration.ConfigConstants;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.configuration.GlobalConfiguration;
import eu.stratosphere.core.fs.FileStatus;
import eu.stratosphere.core.fs.FileSystem;
import eu.stratosphere.core.fs.Path;
import eu.stratosphere.core.testing.io.SequentialInputFormat;
import eu.stratosphere.core.testing.io.SequentialOutputFormat;
import eu.stratosphere.nephele.services.memorymanager.UnsafeMemorySegment;
import eu.stratosphere.nephele.taskmanager.TaskManager;
import eu.stratosphere.util.StringUtils;
import eu.stratosphere.util.Visitor;

/**
 * Base class for type-specific test plans. See {@link TestPlan} for the core {@link Record} implementation.
 * 
 * @param <T>
 *        the record type
 * @param <Records>
 *        record type-specific GenericTestRecords
 */

public abstract class GenericTestPlan<T, Records extends GenericTestRecords<T>> implements Closeable {

    private final Map<GenericDataSinkBase<T>, Records> actualOutputs = new IdentityHashMap<GenericDataSinkBase<T>, Records>();

    private final Operator<?>[] contracts;

    private static final Log LOG = LogFactory.getLog(GenericTestPlan.class);

    private int degreeOfParallelism = 1;

    private final Map<GenericDataSinkBase<T>, Records> expectedOutputs = new IdentityHashMap<GenericDataSinkBase<T>, Records>();

    private final Map<GenericDataSourceBase<?, ?>, Records> inputs = new IdentityHashMap<GenericDataSourceBase<?, ?>, Records>();

    private final List<GenericDataSinkBase<T>> sinks = new ArrayList<GenericDataSinkBase<T>>();

    private final List<GenericDataSourceBase<?, ?>> sources = new ArrayList<GenericDataSourceBase<?, ?>>();

    private TypeConfig<T> defaultConfig;

    /**
     * Initializes TestPlan with the given {@link Operator<?>}s. Like the original {@link Plan}, the contracts may be
     * {@link GenericDataSinkBase<T>}s. However, it
     * is also possible to add arbitrary Operators, to which GenericDataSinkOperators
     * are automatically added.
     * 
     * @param defaultConfig
     *        the {@link TypeConfig} that is used for the plan if no specific config is given
     * @param contracts
     *        a list of Operators with at least one element.
     */
    public GenericTestPlan(final TypeConfig<T> defaultConfig, final Collection<? extends Operator<?>> contracts) {
        this(defaultConfig, contracts.toArray(new Operator<?>[contracts.size()]));
    }

    /**
     * Initializes TestPlan with the given {@link Operator<?>}s. Like the original {@link Plan}, the contracts may be
     * {@link GenericDataSinkBase<T>}s. However, it
     * is also possible to add arbitrary Operators, to which GenericDataSinkOperators
     * are automatically added.
     * 
     * @param defaultConfig
     *        the {@link TypeConfig} that is used for the plan if no specific config is given
     * @param contracts
     *        a list of Operators with at least one element.
     */
    public GenericTestPlan(final TypeConfig<T> defaultConfig, final Operator<?>... contracts) {
        if (contracts.length == 0)
            throw new IllegalArgumentException();

        this.defaultConfig = defaultConfig;

        final Configuration config = new Configuration();
        config.setString(ConfigConstants.DEFAULT_INSTANCE_TYPE, "standard,1,1,200,1,1");
        GlobalConfiguration.includeConfiguration(config);

        this.contracts = new InputOutputAdder<T>(defaultConfig).process(contracts);

        this.findSinksAndSources();
        this.configureSinksAndSources();
    }

    @Override
    public void close() throws IOException {
        final ClosableManager closableManager = new ClosableManager();

        for (final Records pairs : this.inputs.values())
            closableManager.add(pairs);
        for (final Records pairs : this.actualOutputs.values())
            closableManager.add(pairs);
        for (final Records pairs : this.expectedOutputs.values())
            closableManager.add(pairs);

        closableManager.close();
    }

    /**
     * Returns the first output {@link GenericTestRecords} of the TestPlan associated with the
     * given sink. This is the recommended method to get output records for more
     * complex TestPlans.<br>
     * The values are only meaningful after a {@link #run()}.
     * 
     * @return the output {@link GenericTestRecords} of the TestPlan associated with the
     *         first sink
     */
    public Records getActualOutput() {
        return this.getActualOutput(0);
    }

    /**
     * Returns the output {@link GenericTestRecords} of the TestPlan associated with the
     * given sink. This is the recommended method to get output records for more
     * complex TestPlans.<br>
     * The values are only meaningful after a {@link #run()}.
     * 
     * @param sink
     *        the sink of which the associated output GenericTestRecords should be
     *        returned
     * @return the output {@link GenericTestRecords} of the TestPlan associated with the
     *         given sink
     */
    public Records getActualOutput(final GenericDataSinkBase<T> sink) {
        return this.getActualOutput(sink, null);
    }

    /**
     * Returns the output {@link GenericTestRecords} of the TestPlan associated with the
     * given sink. This is the recommended method to get output records for more
     * complex TestPlans.<br>
     * The values are only meaningful after a {@link #run()}.
     * 
     * @param typeConfig
     *        the {@link TypeConfig} that is used for this output
     * @param sink
     *        the sink of which the associated output GenericTestRecords should be
     *        returned
     * @return the output {@link GenericTestRecords} of the TestPlan associated with the
     *         given sink
     */
    public Records getActualOutput(final GenericDataSinkBase<T> sink, final TypeConfig<T> typeConfig) {
        Records values = this.actualOutputs.get(sink);
        if (values == null)
            this.actualOutputs.put(sink, values = this.createTestRecords(typeConfig));
        else if (typeConfig != null)
            values.setTypeConfig(typeConfig);
        return values;
    }

    /**
     * Returns the output {@link GenericTestRecords} associated with the <i>i</i>th
     * output of the TestPlan. If multiple contracts are tested in the TestPlan,
     * it is recommended to use the {@link #getActualOutput(GenericDataSinkBase<T>)} method to unambiguously get the
     * values.<br>
     * The values are only meaningful after a {@link #run()}.
     * 
     * @param number
     *        the number of the output.
     * @return the <i>i</i>th output of the TestPlan
     */
    public Records getActualOutput(final int number) {
        return this.getActualOutput(this.getDataSinks().get(number));
    }

    /**
     * Returns the output {@link GenericTestRecords} of the TestPlan associated with the
     * <i>i</i>th sink.<br>
     * The values are only meaningful after a {@link #run()}.
     * 
     * @param typeConfig
     *        the {@link TypeConfig} that is used for this output
     * @param sinkNumber
     *        the <i>i</i>th sink of which the associated output GenericTestRecords should be
     *        returned
     * @return the <i>i</i>th output of the TestPlan
     */
    public Records getActualOutput(final int sinkNumber, final TypeConfig<T> typeConfig) {
        return this.getActualOutput(this.sinks.get(sinkNumber), typeConfig);
    }

    /**
     * Returns the first output {@link GenericTestRecords} of the TestPlan associated with the
     * given sink. This is the recommended method to get output records for more
     * complex TestPlans.<br>
     * The values are only meaningful after a {@link #run()}.
     * 
     * @param typeConfig
     *        the {@link TypeConfig} that is used for this output
     * @return the output {@link GenericTestRecords} of the TestPlan associated with the
     *         first sink
     */
    public Records getActualOutput(final TypeConfig<T> typeConfig) {
        return this.getActualOutput(0, typeConfig);
    }

    /**
     * Returns the degreeOfParallelism.
     * 
     * @return the degreeOfParallelism
     */
    public int getDegreeOfParallelism() {
        return this.degreeOfParallelism;
    }

    /**
     * Returns the expected output {@link GenericTestRecords} with the given {@link TypeConfig} of the TestPlan
     * associated with the given sink. This is the recommended method to set expected
     * output records for more complex TestPlans.
     * 
     * @param sink
     *        the sink of which the associated expected output GenericTestRecords
     *        should be returned
     * @param typeConfig
     *        the TypeConfig that should be used to create a new GenericTestRecords if needed
     * @return the expected output {@link GenericTestRecords} of the TestPlan associated
     *         with the given sink
     */
    public Records getExpectedOutput(final GenericDataSinkBase<T> sink, final TypeConfig<T> typeConfig) {
        Records values = this.expectedOutputs.get(sink);
        if (values == null) {
            this.expectedOutputs.put(sink, values = this.createTestRecords(typeConfig));
            final Records actualOutput = this.getActualOutput(sink);
            actualOutput.setTypeConfig(typeConfig);
        } else if (typeConfig != null)
            values.setTypeConfig(typeConfig);
        return values;
    }

    /**
     * Returns the expected output {@link GenericTestRecords} associated with the
     * <i>i</i>th expected output of the TestPlan. If multiple contracts are
     * tested in the TestPlan, it is recommended to use the {@link #getExpectedOutput(GenericDataSinkBase<T>,
     * TypeConfig)} method to
     * unambiguously set the values.
     * 
     * @param typeConfig
     *        the TypeConfig that should be used to create a new GenericTestRecords if needed
     * @param number
     *        the number of the expected output.
     * @return the <i>i</i>th expected output of the TestPlan
     */
    public Records getExpectedOutput(final int number, final TypeConfig<T> typeConfig) {
        return this.getExpectedOutput(this.getDataSinks().get(number), typeConfig);
    }

    /**
     * Returns the first expected output {@link GenericTestRecords} of the TestPlan. If
     * multiple contracts are tested in the TestPlan, it is recommended to use
     * the {@link #getExpectedOutput(GenericDataSinkBase<T>, TypeConfig)} method to unambiguously
     * set the values.
     * 
     * @param typeConfig
     * @return the first expected output of the TestPlan
     */
    public Records getExpectedOutput(final TypeConfig<T> typeConfig) {
        return this.getExpectedOutput(0, typeConfig);
    }

    /**
     * Returns the first input {@link GenericTestRecords} of the TestPlan. If multiple
     * contracts are tested in the TestPlan, it is recommended to use the {@link #getInput(GenericDataSource)} method
     * to unambiguously set the
     * values.
     * 
     * @return the first input of the TestPlan
     */
    public Records getInput() {
        return this.getInput(0);
    }

    /**
     * Returns the input {@link GenericTestRecords} of the TestPlan associated with the
     * given source. This is the recommended method to set input records for more
     * complex TestPlans.
     * 
     * @param source
     *        the source of which the associated input GenericTestRecords should be
     *        returned
     * @return the input {@link GenericTestRecords} of the TestPlan associated with the
     *         given source
     */
    public Records getInput(final Operator<?> source) {
        return this.getInput(source, null);
    }

    /**
     * Returns the input {@link GenericTestRecords} of the TestPlan associated with the
     * given source. This is the recommended method to set input records for more
     * complex TestPlans.
     * 
     * @param typeConfig
     *        the TypeConfig that should be used to create a new GenericTestRecords if needed
     * @param source
     *        the source of which the associated input GenericTestRecords should be
     *        returned
     * @return the input {@link GenericTestRecords} of the TestPlan associated with the
     *         given source
     */
    public Records getInput(final Operator<?> source, final TypeConfig<T> typeConfig) {
        Records values = this.inputs.get(source);
        if (values == null)
            this.inputs.put((GenericDataSourceBase<?, ?>) source, values = this.createTestRecords(typeConfig));
        else if (typeConfig != null)
            values.setTypeConfig(typeConfig);
        return values;
    }

    /**
     * Returns the input {@link GenericTestRecords} associated with the <i>i</i>th input
     * of the TestPlan. If multiple contracts are tested in the TestPlan, it is
     * recommended to use the {@link #getInput(GenericDataSource)} method to
     * unambiguously set the values.
     * 
     * @param number
     *        the number of the input.
     * @return the <i>i</i>th input of the TestPlan
     */
    public Records getInput(final int number) {
        return this.getInput(number, null);
    }

    /**
     * Returns the input {@link GenericTestRecords} associated with the <i>i</i>th input
     * of the TestPlan. If multiple contracts are tested in the TestPlan, it is
     * recommended to use the {@link #getInput(GenericDataSource, TypeConfig)} method to
     * unambiguously set the values.
     * 
     * @param number
     *        the number of the input.
     * @param typeConfig
     *        the TypeConfig that should be used to create a new GenericTestRecords if needed
     * @return the <i>i</i>th input of the TestPlan
     */
    public Records getInput(final int number, final TypeConfig<T> typeConfig) {
        return this.getInput(this.getDataSources().get(number), typeConfig);
    }

    /**
     * Traverses the test plan and returns the first contracts that process the
     * data of the given contract.
     * 
     * @param contract
     *        the contract of which one preceding contracts should be
     *        returned
     * @return returns the first contract that process the data of the given
     *         contract
     */
    public Operator<?> getOutputOfOperator(final Operator<?> contract) {
        return this.getOutputsOfOperator(contract)[0];
    }

    /**
     * Traverses the test plan and returns all contracts that process the data
     * of the given contract.
     * 
     * @param contract
     *        the contract of which preceding contracts should be returned
     * @return returns all contracts that process the data of the given contract
     */
    public Operator<?>[] getOutputsOfOperator(final Operator<?> contract) {
        final ArrayList<Operator<?>> outputs = new ArrayList<Operator<?>>();

        for (final Operator<?> sink : this.sinks)
            sink.accept(new Visitor<Operator<?>>() {
                LinkedList<Operator<?>> outputStack = new LinkedList<Operator<?>>();

                @Override
                public void postVisit(final Operator<?> visitable) {
                }

                @Override
                public boolean preVisit(final Operator<?> visitable) {
                    if (visitable == contract)
                        outputs.add(this.outputStack.peek());
                    this.outputStack.push(visitable);
                    return true;
                }
            });

        return outputs.toArray(new Operator<?>[outputs.size()]);
    }

    /**
     * Returns all {@link GenericDataSinkBase<T>}s of this test plan.
     * 
     * @return the sinks
     */
    public List<GenericDataSinkBase<T>> getSinks() {
        return this.sinks;
    }

    /**
     * Returns the sources.
     * 
     * @return the sources
     */
    public List<GenericDataSourceBase<?, ?>> getSources() {
        return this.sources;
    }

    /**
     * Compiles the plan to an {@link Plan} and executes it. If
     * expected values have been specified, the actual outputs values are
     * compared to the expected values.
     */
    public void run() {
        try {
            final Plan plan = this.buildPlanWithReadableSinks();
            this.syncDegreeOfParallelism(plan);
            this.initAdhocInputs();

            //         Configuration memoryLimit = new Configuration();
            //         memoryLimit.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, (int) (Runtime.getRuntime().totalMemory() / 2));
            //         GlobalConfiguration.includeConfiguration(memoryLimit);
            byte[] reservedSpace = new byte[20 * 1024 * 1024];
            LocalExecutor.execute(plan);
            LOG.trace("Reserving " + reservedSpace.length + " bytes for Sopremo");
        } catch (final Exception e) {
            Assert.fail("plan scheduling: " + e.getMessage() + "\n" + StringUtils.stringifyException(e));
        }

        try {
            this.validateResults();
        } finally {
            try {
                this.close();
            } catch (final IOException e) {
            }
        }
    }

    /**
     * Sets the degreeOfParallelism to the specified value.
     * 
     * @param degreeOfParallelism
     *        the degreeOfParallelism to set
     */
    public void setDegreeOfParallelism(final int degreeOfParallelism) {
        this.degreeOfParallelism = degreeOfParallelism;
    }

    /**
     * Creates the actual plan for the given sinks.
     * 
     * @param wrappedSinks
     *        the sinks
     * @return the plan
     */
    protected Plan createPlan(final Collection<GenericDataSinkBase<T>> wrappedSinks) {
        return new Plan(wrappedSinks);
    }

    /**
     * Creates the concrete implementation of {@link GenericTestRecords} for this plan.
     * 
     * @param typeConfig
     *        the {@link TypeConfig} to use
     * @return the test records
     */
    protected abstract Records createTestRecords(final TypeConfig<T> typeConfig);

    /**
     * Returns the defaultConfig.
     * 
     * @return the defaultConfig
     */
    protected TypeConfig<T> getDefaultConfig() {
        return this.defaultConfig;
    }

    /**
     * Sets the defaultConfig to the specified value.
     * 
     * @param defaultConfig
     *        the defaultConfig to set
     */
    protected void setDefaultConfig(final TypeConfig<T> defaultConfig) {
        if (defaultConfig == null)
            throw new NullPointerException("defaultConfig must not be null");

        this.defaultConfig = defaultConfig;
    }

    /**
     * Actually builds the plan but guarantees that the output can be read
     * without additional knowledge. Currently the {@link SequentialOutputFormat} is used for a guaranteed
     * deserializable
     * output.<br>
     * If a data source is not {@link SequentialOutputFormat}, it is replaced by
     * a {@link SplittingOutputFormat}, with two outputs: the original one and
     * one {@link SequentialOutputFormat}.
     */
    private Plan buildPlanWithReadableSinks() {
        final Collection<GenericDataSinkBase<T>> existingSinks = this.getDataSinks();
        final Collection<GenericDataSinkBase<T>> wrappedSinks = new ArrayList<GenericDataSinkBase<T>>();
        for (final GenericDataSinkBase<T> fileSink : existingSinks) {
            final Configuration inputConfig = new Configuration();
            final TypeConfig<T> typeConfig = this.getActualOutput(fileSink, this.defaultConfig).getTypeConfig();
            SequentialInputFormat.configureSequentialFormat(inputConfig)
                    .typeSerializer(typeConfig.getTypeSerializerFactory());
            // need a format which is deserializable without configuration
            if (!fileSink.getFormatWrapper().getUserCodeClass().equals(SequentialOutputFormat.class)) {
                final Records expectedValues = this.expectedOutputs.get(fileSink);

                final FileDataSinkBase<T> safeSink = createDefaultSink(fileSink.getName(), typeConfig);

                safeSink.setInput(fileSink.getInput());

                wrappedSinks.add(fileSink);
                wrappedSinks.add(safeSink);

                // only add to expected outputs if we need to check for values
                if (expectedValues != null)
                    this.expectedOutputs.put(safeSink, expectedValues);
                this.actualOutputs.put(safeSink, this.getActualOutput(fileSink));
                this.getActualOutput(fileSink).load(SequentialInputFormat.class, safeSink.getFilePath(),
                        inputConfig);

            } else {
                wrappedSinks.add(fileSink);
                this.getActualOutput(fileSink).load(SequentialInputFormat.class,
                        ((FileDataSinkBase<T>) fileSink).getFilePath(), inputConfig);
                // make sure that the type serializer is set
                final Configuration outputConfig = new Configuration();
                SequentialOutputFormat.configureSequentialFormat(outputConfig)
                        .typeSerializer(typeConfig.getTypeSerializerFactory());
                fileSink.getParameters().addAll(outputConfig);
            }
        }

        return this.createPlan(wrappedSinks);
    }

    /**
     * 
     */
    private void configureSinksAndSources() {
        for (final GenericDataSinkBase<T> sink : this.sinks)
            if (sink.getFormatWrapper().getUserCodeObject() instanceof FileOutputFormat<?>)
                ((FileOutputFormat<?>) sink.getFormatWrapper().getUserCodeObject()).setOpenTimeout(0);
        for (final GenericDataSourceBase<?, ?> source : this.sources)
            if (source.getFormatWrapper().getUserCodeObject() instanceof FileInputFormat<?>)
                ((FileInputFormat<?>) source.getFormatWrapper().getUserCodeObject()).setOpenTimeout(0);
    }

    /**
     * Traverses the plan for all sinks and sources.
     */
    @SuppressWarnings({ "unchecked", "rawtypes" })
    private void findSinksAndSources() {
        for (final Operator<?> contract : this.contracts)
            contract.accept(new Visitor<Operator<?>>() {
                @Override
                public void postVisit(final Operator<?> visitable) {
                }

                @Override
                public boolean preVisit(final Operator<?> visitable) {
                    if (visitable instanceof GenericDataSinkBase && !GenericTestPlan.this.sinks.contains(visitable))
                        GenericTestPlan.this.sinks.add((GenericDataSinkBase<T>) visitable);
                    if (visitable instanceof GenericDataSourceBase<?, ?>
                            && !GenericTestPlan.this.sources.contains(visitable))
                        GenericTestPlan.this.sources.add((GenericDataSourceBase<?, ?>) visitable);
                    return true;
                }
            });

        for (final GenericDataSourceBase<?, ?> source : this.sources)
            if (source instanceof FileDataSourceBase)
                this.getInput(source).load(
                        (Class<? extends FileInputFormat>) source.getFormatWrapper().getUserCodeClass(),
                        ((FileDataSourceBase) source).getFilePath(), source.getParameters());
            else
                this.getInput(source).load(
                        (Class<? extends GenericInputFormat>) source.getFormatWrapper().getUserCodeClass(),
                        source.getParameters());
    }

    private List<GenericDataSinkBase<T>> getDataSinks() {
        return this.sinks;
    }

    private List<? extends GenericDataSourceBase<?, ?>> getDataSources() {
        return this.sources;
    }

    private void initAdhocInputs() throws IOException {
        for (final GenericDataSourceBase<?, ?> source : this.sources) {
            final Records input = this.getInput(source, this.defaultConfig);
            if (source.getFormatWrapper().getUserCodeClass().equals(SequentialInputFormat.class))
                SequentialInputFormat.configureSequentialFormat(source)
                        .typeSerializer(input.getTypeConfig().getTypeSerializerFactory());
            if (input.isAdhoc() && source instanceof FileDataSourceBase)
                input.saveToFile(((FileDataSourceBase<?>) source).getFilePath());
        }
    }

    /**
     * Sets the degree of parallelism for every node in the plan.
     */
    private void syncDegreeOfParallelism(final Plan plan) {
        plan.accept(new Visitor<Operator<?>>() {

            @Override
            public void postVisit(final Operator<?> visitable) {
            }

            @Override
            public boolean preVisit(final Operator<?> visitable) {
                int degree = GenericTestPlan.this.getDegreeOfParallelism();
                if (visitable instanceof GenericDataSourceBase<?, ?>)
                    degree = 1;
                else if (degree > 1 && visitable instanceof FileDataSinkBase)
                    try {
                        final Path path = new Path(((FileDataSinkBase<?>) visitable).getFilePath());

                        final FileSystem fs = path.getFileSystem();

                        final FileStatus f = fs.getFileStatus(path);

                        if (!f.isDir()) {
                            fs.delete(path, false);
                            fs.mkdirs(path);
                        }
                    } catch (final IOException e) {
                        e.printStackTrace();
                    }
                if (visitable.getDegreeOfParallelism() == -1)
                    visitable.setDegreeOfParallelism(degree);
                return true;
            }
        });
    }

    private void validateResults() {
        for (final GenericDataSinkBase<T> sinkOperator : this.getDataSinks()) {
            final Records expectedValues = this.expectedOutputs.get(sinkOperator);
            // need a format which is deserializable without configuration
            if (sinkOperator.getFormatWrapper().getUserCodeClass() == (Class<?>) SequentialOutputFormat.class
                    && expectedValues != null && expectedValues.isInitialized()) {
                final Records actualValues = this.getActualOutput(sinkOperator);

                try {
                    actualValues.assertEquals(expectedValues);
                } catch (final AssertionError e) {
                    final AssertionError assertionError = new AssertionError(
                            sinkOperator.getName() + ": " + e.getMessage());
                    assertionError.initCause(e.getCause());
                    throw assertionError;
                } finally {
                    actualValues.close();
                }
            }
        }
    }

    /**
     * Creates a default sink with the given name. This sink may be used with ad-hoc values added to the corresponding
     * {@link GenericTestRecords}.
     * 
     * @param name
     *        the name of the sink
     * @param typeConfig
     *        the {@link TypeConfig} used in {@link SequentialOutputFormat}
     * @return the created sink
     */
    public static <T> FileDataSinkBase<T> createDefaultSink(final String name, final TypeConfig<T> typeConfig) {
        @SuppressWarnings("unchecked")
        final FileDataSinkBase<T> sink = new FileDataSinkBase<T>((FileOutputFormat<T>) new SequentialOutputFormat(),
                typeConfig.getSink(), getTestPlanFile("output"), name);
        SequentialOutputFormat.configureSequentialFormat(sink)
                .typeSerializer(typeConfig.getTypeSerializerFactory());
        return sink;
    }

    /**
     * Creates a default source with the given name. This sink may be used with ad-hoc values added to the corresponding
     * {@link GenericTestRecords}.
     * 
     * @param name
     *        the name of the source
     * @param typeConfig
     *        the {@link TypeConfig} used in {@link SequentialInputFormat}
     * @return the created source
     */
    @SuppressWarnings({ "unchecked", "rawtypes" })
    public static FileDataSourceBase createDefaultSource(final String name, final TypeConfig<?> typeConfig) {
        final FileDataSourceBase source = new FileDataSourceBase(new SequentialInputFormat(),
                typeConfig.getSource(), getTestPlanFile("input"), name);
        SequentialInputFormat.configureSequentialFormat(source)
                .typeSerializer(typeConfig.getTypeSerializerFactory());
        return source;
    }

    static String getTestPlanFile(final String prefix) {
        return createTemporaryFile("testPlan", prefix);
    }

    private static String createTemporaryFile(final String suffix, final String prefix) {
        try {
            final File tempFile = File.createTempFile(suffix, prefix);
            tempFile.deleteOnExit();
            return tempFile.toURI().toString();
        } catch (final IOException e) {
            throw new IllegalStateException("Cannot create temporary file for prefix " + prefix, e);
        }
    }
}