org.pentaho.di.cluster.PartitioningTest.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.di.cluster.PartitioningTest.java

Source

/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.cluster;

import org.apache.commons.vfs.FileObject;
import org.pentaho.di.core.logging.LogChannel;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransExecutionConfiguration;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.cluster.TransSplitter;

public class PartitioningTest extends BaseCluster {

    /**
     * This test reads a CSV file in parallel on the cluster, one copy per slave.<br>
     * It then partitions the data on id in 12 partitions (4 per slave) and keeps the data partitioned until written to
     * file.<br>
     * As such we expect 12 files on disk.<br>
     * File: "partitioning-swimming-lanes-on-cluster.ktr"<br>
     */
    public void testPartitioningSwimmingLanesOnCluster() throws Exception {
        init();

        ClusterGenerator clusterGenerator = new ClusterGenerator();
        try {
            clusterGenerator.launchSlaveServers();

            TransMeta transMeta = loadAndModifyTestTransformation(clusterGenerator,
                    "test/org/pentaho/di/cluster/partitioning-swimming-lanes-on-cluster.ktr");
            TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
            TransSplitter transSplitter = Trans.executeClustered(transMeta, config);
            long nrErrors = Trans.monitorClusteredTransformation(
                    new LogChannel("cluster unit test <testParallelFileReadOnMaster>"), transSplitter, null, 1);
            assertEquals(0L, nrErrors);

            String[] results = new String[] { "8", "9", "9", "9", "9", "8", "8", "8", "8", "8", "8", "8", };
            String[] files = new String[] { "000", "001", "002", "003", "004", "005", "006", "007", "008", "009",
                    "010", "011", };
            for (int i = 0; i < results.length; i++) {
                String filename = "${java.io.tmpdir}/partitioning-swimming-lanes-on-cluster-" + files[i] + ".txt";
                String result = loadFileContent(transMeta, filename);
                assertEqualsIgnoreWhitespacesAndCase(results[i], result);

                // Remove the output file : we don't want to leave too much clutter around
                //
                FileObject file = KettleVFS.getFileObject(transMeta.environmentSubstitute(filename));
                file.delete();
            }

        } catch (Exception e) {
            e.printStackTrace();
            fail(e.toString());
        } finally {
            try {
                clusterGenerator.stopSlaveServers();
            } catch (Exception e) {
                e.printStackTrace();
                fail(e.toString());
            }
        }
    }

    /**
     * This test reads a CSV file in parallel on the cluster, one copy per slave.<br>
     * It then partitions the data on id in 12 partitions (4 per slave).<br>
     * After that it re-partitions the data in 9 partitions (3 per slave).<br>
     * As such we expect 9 result files on disk.<br>
     * File: "partitioning-repartitioning-on-cluster.ktr"<br>
     */
    public void testPartitioningRepartitioningOnCluster() throws Exception {
        init();

        ClusterGenerator clusterGenerator = new ClusterGenerator();
        try {
            clusterGenerator.launchSlaveServers();

            TransMeta transMeta = loadAndModifyTestTransformation(clusterGenerator,
                    "test/org/pentaho/di/cluster/partitioning-repartitioning-on-cluster.ktr");
            TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
            TransSplitter transSplitter = Trans.executeClustered(transMeta, config);
            long nrErrors = Trans.monitorClusteredTransformation(
                    new LogChannel("cluster unit test <testParallelFileReadOnMaster>"), transSplitter, null, 1);
            assertEquals(0L, nrErrors);

            String[] results = new String[] { "8", "9", "9", "9", "9", "8", "8", "8", "8", "8", "8", "8", };
            String[] files = new String[] { "000", "001", "002", "003", "004", "005", "006", "007", "008", "009",
                    "010", "011", };
            for (int i = 0; i < results.length; i++) {
                String filename = "${java.io.tmpdir}/partitioning-repartitioning-on-cluster-" + files[i] + ".txt";
                String result = loadFileContent(transMeta, filename);
                assertEqualsIgnoreWhitespacesAndCase(results[i], result);

                // Remove the output file : we don't want to leave too much clutter around
                //
                FileObject file = KettleVFS.getFileObject(transMeta.environmentSubstitute(filename));
                file.delete();
            }

        } catch (Exception e) {
            e.printStackTrace();
            fail(e.toString());
        } finally {
            try {
                clusterGenerator.stopSlaveServers();
            } catch (Exception e) {
                e.printStackTrace();
                fail(e.toString());
            }
        }
    }

    /**
     * Same as testPartitioningRepartitioningOnCluster() but passing the data to a non-partitioned step on the master.
     *
     * File: "partitioning-repartitioning-on-cluster3.ktr"<br>
     */
    public void testPartitioningRepartitioningOnCluster3() throws Exception {
        init();

        ClusterGenerator clusterGenerator = new ClusterGenerator();
        try {
            clusterGenerator.launchSlaveServers();

            TransMeta transMeta = loadAndModifyTestTransformation(clusterGenerator,
                    "test/org/pentaho/di/cluster/partitioning-repartitioning-on-cluster3.ktr");
            TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
            TransSplitter transSplitter = Trans.executeClustered(transMeta, config);
            long nrErrors = Trans.monitorClusteredTransformation(
                    new LogChannel("cluster unit test <testParallelFileReadOnMaster>"), transSplitter, null, 1);
            assertEquals(0L, nrErrors);

            String goldenData = "0;16\n1;17\n2;17\n3;17\n4;17\n5;16";
            String filename = "${java.io.tmpdir}/partitioning-repartitioning-on-cluster3.txt";
            String result = loadFileContent(transMeta, filename);
            assertEqualsIgnoreWhitespacesAndCase(goldenData, result);

            // Remove the output file : we don't want to leave too much clutter around
            //
            // FileObject file = KettleVFS.getFileObject(transMeta.environmentSubstitute(filename));
            // file.delete();
        } catch (Exception e) {
            e.printStackTrace();
            fail(e.toString());
        } finally {
            try {
                clusterGenerator.stopSlaveServers();
            } catch (Exception e) {
                e.printStackTrace();
                fail(e.toString());
            }
        }
    }
}