at.illecker.hama.hybrid.examples.matrixmultiplication2.MatrixMultiplicationHybridBenchmark.java Source code

Java tutorial

Introduction

Here is the source code for at.illecker.hama.hybrid.examples.matrixmultiplication2.MatrixMultiplicationHybridBenchmark.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package at.illecker.hama.hybrid.examples.matrixmultiplication2;

import java.io.IOException;
import java.util.List;
import java.util.Random;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hama.bsp.BSPJob;

import com.google.caliper.Benchmark;
import com.google.caliper.Param;
import com.google.caliper.api.Macrobenchmark;
import com.google.caliper.runner.CaliperMain;

public class MatrixMultiplicationHybridBenchmark extends Benchmark {

    // @Param({ "256", "512", "768", "1024", "1280", "1536", "1792", "2048" })
    private int n = 2048;

    // @Param
    // CalcType type;

    public enum CalcType {
        CPU, GPU
    };

    @Param({ "1", "2", "3", "4", "5" })
    private int bspTaskNum; // = 5;
    private final int maxTaskNum = 5;

    // GPU percentage of the input data
    // @Param({ "20", "30", "40", "50", "60", "70", "80", "90", "95" })
    private int GPUWorkload = 0;

    private static final int TILE_WIDTH = 32; // max blockSize is 1024 (32 x 32)

    private static final Path CONF_TMP_DIR = new Path(
            "output/hama/hybrid/examples/matrixmultiplication/bench-" + System.currentTimeMillis());
    private static final Path CONF_INPUT_DIR = new Path(CONF_TMP_DIR, "input");
    private static final Path CONF_OUTPUT_DIR = new Path(CONF_TMP_DIR, "output");

    private static final Path MATRIX_A_SPLITS_PATH = new Path(CONF_INPUT_DIR + "/matrixAsplits/");
    private static Path MATRIX_B_TRANSPOSED_PATH = new Path(CONF_INPUT_DIR + "/transposedMatrixB");
    private static final Path MATRIX_A_PATH = new Path(CONF_INPUT_DIR + "/matrixA");
    private static Path MATRIX_B_PATH = new Path(CONF_INPUT_DIR + "/matrixB");
    private static final Path MATRIX_C_PATH = new Path(CONF_OUTPUT_DIR + "/matrixC");
    private static final Path MATRIX_D_PATH = new Path(CONF_OUTPUT_DIR + "/matrixD");

    private Configuration m_conf = null;
    private boolean m_runLocally = false;
    private int m_numBspTask;
    private int m_numGpuBspTask;
    private List<Path> m_transposedMatrixBPaths;
    private DistributedRowMatrix m_matrixC = null;

    @Override
    protected void setUp() throws Exception {
        m_conf = new Configuration();

        // Try to load Hadoop configuration
        String HADOOP_HOME = System.getenv("HADOOP_HOME");
        String HADOOP_INSTALL = System.getenv("HADOOP_INSTALL");
        if ((HADOOP_HOME != null) || (HADOOP_INSTALL != null) && (!m_runLocally)) {
            String HADOOP = ((HADOOP_HOME != null) ? HADOOP_HOME : HADOOP_INSTALL);

            m_conf.addResource(new Path(HADOOP, "src/core/core-default.xml"));
            m_conf.addResource(new Path(HADOOP, "src/hdfs/hdfs-default.xml"));
            m_conf.addResource(new Path(HADOOP, "src/mapred/mapred-default.xml"));
            m_conf.addResource(new Path(HADOOP, "conf/core-site.xml"));
            m_conf.addResource(new Path(HADOOP, "conf/hdfs-site.xml"));
            m_conf.addResource(new Path(HADOOP, "conf/mapred-site.xml"));
            // System.out.println("Loaded Hadoop configuration from " + HADOOP);

            try {
                // Connect to HDFS Filesystem
                FileSystem.get(m_conf);
            } catch (Exception e) {
                // HDFS not reachable run Benchmark locally
                m_conf = new Configuration();
                m_runLocally = true;
            }
            // System.out.println("Run Benchmark local: " + m_runLocally);
        }

        // Try to load Hama configuration
        String HAMA_HOME = System.getenv("HAMA_HOME");
        String HAMA_INSTALL = System.getenv("HAMA_INSTALL");
        if ((HAMA_HOME != null) || (HAMA_INSTALL != null) && (!m_runLocally)) {
            String HAMA = ((HAMA_HOME != null) ? HAMA_HOME : HAMA_INSTALL);

            m_conf.addResource(new Path(HAMA, "conf/hama-default.xml"));
            m_conf.addResource(new Path(HAMA, "conf/hama-site.xml"));
            // System.out.println("Loaded Hama configuration from " + HAMA);
        }

        // CPU vs GPU benchmark
        int numGpuBspTask = 0;
        // if (type == CalcType.GPU) {
        // bspTaskNum = 1;
        // numGpuBspTask = 1;
        // GPUWorkload = 100;
        // }

        // CPU + GPU Hybrid benchmark
        if (bspTaskNum == maxTaskNum) {
            numGpuBspTask = 1;
            GPUWorkload = 95;
        } else {
            numGpuBspTask = 0;
        }

        // Set CPU tasks
        m_conf.setInt("bsp.peers.num", bspTaskNum);
        m_numBspTask = bspTaskNum;
        // Set GPU tasks
        m_conf.setInt("bsp.peers.gpu.num", numGpuBspTask);
        m_numGpuBspTask = numGpuBspTask;

        m_conf.setBoolean("hama.pipes.logging", false);

        // Generate input matrix A and transposed matrix B
        prepareInput();

        // Debug output
        // System.out.println("CalcType: " + type);
        System.out.println("CONF_TMP_DIR: " + CONF_TMP_DIR.toString());
        System.out.println("NumBspTask: " + m_conf.getInt("bsp.peers.num", 0) + " NumGpuBspTask: "
                + m_conf.getInt("bsp.peers.gpu.num", 0));
        System.out.println("n: " + n + " GPUWorkload: " + GPUWorkload + "%");
    }

    @Override
    protected void tearDown() throws Exception {
        // skip verification
        // verify();

        // Cleanup
        FileSystem fs = FileSystem.get(m_conf);
        fs.delete(CONF_TMP_DIR, true);

        // printOutput(m_conf);
    }

    private void prepareInput() throws Exception {
        // Create random DistributedRowMatrix
        // use constant seeds to get reproducible results
        // Matrix A
        DistributedRowMatrix.createRandomDistributedRowMatrix(m_conf, n, n, new Random(42L), MATRIX_A_SPLITS_PATH,
                false, m_numBspTask, m_numGpuBspTask, GPUWorkload);

        // Matrix B is stored in transposed order
        m_transposedMatrixBPaths = DistributedRowMatrix.createRandomDistributedRowMatrix(m_conf, n, n,
                new Random(1337L), MATRIX_B_TRANSPOSED_PATH, true);
    }

    private void verify() throws Exception {
        // Create matrix A in one file for verification
        List<Path> matrixAPaths = DistributedRowMatrix.createRandomDistributedRowMatrix(m_conf, n, n,
                new Random(42L), MATRIX_A_PATH, false);
        DistributedRowMatrix matrixA = new DistributedRowMatrix(matrixAPaths.get(0), CONF_INPUT_DIR, n, n);
        matrixA.setConf(m_conf);

        // Create matrix B, NOT transposed for verification
        List<Path> matrixBPaths = DistributedRowMatrix.createRandomDistributedRowMatrix(m_conf, n, n,
                new Random(1337L), MATRIX_B_PATH, false);
        DistributedRowMatrix matrixB = new DistributedRowMatrix(matrixBPaths.get(0), CONF_INPUT_DIR, n, n);
        matrixB.setConf(m_conf);

        // Verification
        DistributedRowMatrix matrixD = matrixA.multiplyJava(matrixB, MATRIX_D_PATH);
        if (m_matrixC.verify(matrixD)) {
            System.out.println("Verify PASSED!");
        } else {
            System.out.println("Verify FAILED!");
        }
    }

    static void printOutput(Configuration conf) throws IOException {
        FileSystem fs = CONF_OUTPUT_DIR.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(CONF_OUTPUT_DIR);
        for (int i = 0; i < files.length; i++) {
            if (files[i].getLen() > 0) {
                System.out.println("File " + files[i].getPath());
                FSDataInputStream in = fs.open(files[i].getPath());
                IOUtils.copyBytes(in, System.out, conf, false);
                in.close();
            }
        }
        // fs.delete(FileOutputFormat.getOutputPath(job), true);
    }

    @Macrobenchmark
    public void timeCalculate() {
        doBenchmark();
    }

    public void doBenchmark() {
        try {
            ToolRunner.run(new MatrixMultiplication(), null);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private class MatrixMultiplication extends Configured implements Tool {
        public MatrixMultiplication() {
        }

        @Override
        public int run(String[] arg0) throws Exception {
            BSPJob job = MatrixMultiplicationHybridBSP.createMatrixMultiplicationHybridBSPConf(m_conf,
                    MATRIX_A_SPLITS_PATH, m_transposedMatrixBPaths.get(0), MATRIX_C_PATH, TILE_WIDTH, false);

            long startTime = System.currentTimeMillis();

            // Execute MatrixMultiplication BSP Job
            if (job.waitForCompletion(true)) {

                // Rename result file to output path
                Path matrixCoutPath = new Path(MATRIX_C_PATH + "/part0.seq");

                FileSystem fs = MATRIX_C_PATH.getFileSystem(m_conf);
                FileStatus[] files = fs.listStatus(MATRIX_C_PATH);
                for (int i = 0; i < files.length; i++) {
                    if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                        fs.rename(files[i].getPath(), matrixCoutPath);
                        break;
                    }
                }

                // Read resulting Matrix from HDFS
                m_matrixC = new DistributedRowMatrix(matrixCoutPath, MATRIX_C_PATH, n, n);
                m_matrixC.setConf(m_conf);
            }

            System.out.println("MatrixMultiplicationHybrid using Hama finished in "
                    + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

            return 0;
        }
    }

    public static void main(String[] args) {
        CaliperMain.main(MatrixMultiplicationHybridBenchmark.class, args);
    }

}