org.cloudata.core.PerformanceTest.java Source code

Java tutorial

Introduction

Here is the source code for org.cloudata.core.PerformanceTest.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.cloudata.core;

import java.io.IOException;
import java.io.PrintStream;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Random;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.log4j.Logger;
import org.cloudata.core.client.Cell;
import org.cloudata.core.client.DirectUploader;
import org.cloudata.core.client.CTable;
import org.cloudata.core.client.Row;
import org.cloudata.core.client.ScanCell;
import org.cloudata.core.client.scanner.ScannerFactory;
import org.cloudata.core.client.scanner.TableScanner;
import org.cloudata.core.common.conf.CloudataConf;
import org.cloudata.core.tablet.TableSchema;

/**
 * @author jindolk
 *
 */
public class PerformanceTest {
    static final Logger LOG = Logger.getLogger(PerformanceTest.class.getName());

    private static final int ROW_LENGTH = 1000;
    private static final int ONE_GB = 1024 * 1024 * 1000;
    private static final int ROWS_PER_GB = ONE_GB / ROW_LENGTH;

    private static final String COLUMN_NAME = "info";
    private static final String COLUMN_KEY = "data";
    protected static TableSchema tableInfo;
    static {
        tableInfo = new TableSchema("PerformanceTest");
        tableInfo.addColumn(COLUMN_NAME);
    }

    private static final String RANDOM_READ = "randomRead";
    private static final String RANDOM_READ_MEM = "randomReadMem";
    private static final String RANDOM_WRITE = "randomWrite";
    private static final String SEQUENTIAL_READ = "sequentialRead";
    private static final String SEQUENTIAL_WRITE = "sequentialWrite";
    private static final String SCAN = "scan";
    private static final String BATCH_UPLOAD = "batchUpload";

    private static final List<String> COMMANDS = Arrays.asList(new String[] { RANDOM_READ, RANDOM_READ_MEM,
            RANDOM_WRITE, SEQUENTIAL_READ, SEQUENTIAL_WRITE, SCAN, BATCH_UPLOAD });

    volatile Configuration conf;
    private boolean miniCluster = false;
    private int N = 1;
    private int R = ROWS_PER_GB;
    private int tabletCount = 5;
    private boolean batchUpload = false;

    private SortedSet<Row.Key> rowKeySet = new TreeSet<Row.Key>();
    private static final Path PERF_EVAL_DIR = new Path("performance_test");
    private CloudataConf cConf = new CloudataConf();

    /**
     * Regex to parse lines in input file passed to mapreduce task.
     */
    public static final Pattern LINE_PATTERN = Pattern
            .compile("startRow=(\\d+),\\s+" + "perClientRunRows=(\\d+),\\s+totalRows=(\\d+),\\s+clients=(\\d+)");

    public static final DecimalFormat df = new DecimalFormat("0000000000");

    /**
     * Enum for map metrics.  Keep it out here rather than inside in the Map
     * inner-class so we can find associated properties.
     */
    protected static enum Counter {
        /** elapsed time */
        ELAPSED_TIME,
        /** number of rows */
        ROWS
    }

    /**
     * Constructor
     * @param c Configuration object
     */
    public PerformanceTest(final Configuration c) {
        this.conf = c;
    }

    /**
     * Implementations can have their status set.
     */
    static interface Status {
        /**
         * Sets status
         * @param msg status message
         * @throws IOException
         */
        void setStatus(final String msg) throws IOException;
    }

    /**
     * MapReduce job that runs a performance evaluation client in each map task.
     */
    @SuppressWarnings("unchecked")
    public static class EvaluationMapTask extends MapReduceBase
            implements Mapper<WritableComparable, Writable, LongWritable, Text> {
        /** configuration parameter name that contains the command */
        public final static String CMD_KEY = "EvaluationMapTask.command";
        private String cmd;
        private PerformanceTest pTest;

        /** {@inheritDoc} */
        @Override
        public void configure(JobConf j) {
            this.cmd = j.get(CMD_KEY);

            this.pTest = new PerformanceTest(j);
        }

        /** {@inheritDoc} */
        public void map(@SuppressWarnings("unused") final WritableComparable key, final Writable value,
                final OutputCollector<LongWritable, Text> output, final Reporter reporter) throws IOException {
            Matcher m = LINE_PATTERN.matcher(((Text) value).toString());
            if (m != null && m.matches()) {
                int startRow = Integer.parseInt(m.group(1));
                int perClientRunRows = Integer.parseInt(m.group(2));
                int totalRows = Integer.parseInt(m.group(3));
                Status status = new Status() {
                    public void setStatus(String msg) {
                        try {
                            reporter.setStatus(msg);
                        } catch (Exception e) {
                        }
                    }
                };
                long elapsedTime = this.pTest.runOneClient(this.cmd, startRow, perClientRunRows, totalRows, status);
                // Collect how much time the thing took.  Report as map output and
                // to the ELAPSED_TIME counter.
                reporter.incrCounter(Counter.ELAPSED_TIME, elapsedTime);
                reporter.incrCounter(Counter.ROWS, perClientRunRows);
                output.collect(new LongWritable(startRow), new Text(Long.toString(elapsedTime)));
            }
        }
    }

    /*
     * If table does not already exist, create.
     * @param c Client to use checking.
     * @return True if we created the table.
     * @throws IOException
     */
    private boolean checkTable() throws IOException {
        if (batchUpload) {
            int rowPerTablet = this.R / tabletCount;

            SortedSet<Row.Key> sortedRow = new TreeSet<Row.Key>();
            for (int i = 0; i < this.R; i++) {
                String rowKeyVal = df.format(i);
                sortedRow.add(new Row.Key(rowKeyVal));
            }

            int index = 1;
            for (Row.Key eachRowKey : sortedRow) {
                if (index != 1 && index % rowPerTablet == 0) {
                    rowKeySet.add(eachRowKey);
                    //System.out.println("RowKey Range:" + eachHashRowKey);
                }
                index++;
            }
            rowKeySet.add(Row.Key.MAX_KEY);
            //System.out.println("RowKey Range:" + HashRowKey.maxRowKey);
            CTable.createTable(cConf, tableInfo, rowKeySet.toArray(new Row.Key[rowKeySet.size()]));
        } else {
            if (!CTable.existsTable(cConf, tableInfo.getTableName())) {
                CTable.createTable(cConf, tableInfo);
                LOG.info("Table " + tableInfo + " created");
            } else {
                LOG.info("Table " + tableInfo + " exists. use existed table");
            }
        }

        try {
            Thread.sleep(5 * 1000);
        } catch (InterruptedException e) {
        }
        return true;
    }

    /*
     * We're to run multiple clients concurrently.  Setup a mapreduce job.  Run
     * one map per client.  Then run a single reduce to sum the elapsed times.
     * @param cmd Command to run.
     * @throws IOException
     */
    private void runNIsMoreThanOne(final String cmd) throws IOException {
        checkTable();

        // Run a mapreduce job.  Run as many maps as asked-for clients.
        // Before we start up the job, write out an input file with instruction
        // per client regards which row they are to start on.
        Path inputDir = writeInputFile(this.conf);
        this.conf.set(EvaluationMapTask.CMD_KEY, cmd);
        JobConf job = new JobConf(this.conf, this.getClass());
        FileInputFormat.addInputPath(job, inputDir);
        job.setInputFormat(TextInputFormat.class);
        job.setJobName("Cloudata Performance Evaluation");
        job.setMapperClass(EvaluationMapTask.class);
        job.setMaxMapAttempts(1);
        job.setMaxReduceAttempts(1);
        job.setNumMapTasks(this.N * 10); // Ten maps per client.
        job.setNumReduceTasks(1);
        job.setOutputFormat(TextOutputFormat.class);
        FileOutputFormat.setOutputPath(job, new Path(inputDir, "outputs"));
        JobClient.runJob(job);
    }

    /*
     * Write input file of offsets-per-client for the mapreduce job.
     * @param c Configuration
     * @return Directory that contains file written.
     * @throws IOException
     */
    private Path writeInputFile(final Configuration c) throws IOException {
        FileSystem fs = FileSystem.get(c);
        if (!fs.exists(PERF_EVAL_DIR)) {
            fs.mkdirs(PERF_EVAL_DIR);
        }
        SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss");
        Path subdir = new Path(PERF_EVAL_DIR, formatter.format(new Date()));
        fs.mkdirs(subdir);
        Path inputFile = new Path(subdir, "input.txt");
        PrintStream out = new PrintStream(fs.create(inputFile));
        try {
            for (int i = 0; i < (this.N * 10); i++) {
                // Write out start row, total number of rows per client run: 1/10th of
                // (R/N).
                int perClientRows = (this.R / this.N);
                out.println("startRow=" + i * perClientRows + ", perClientRunRows=" + (perClientRows / 10)
                        + ", totalRows=" + this.R + ", clients=" + this.N);
            }
        } finally {
            out.close();
        }
        return subdir;
    }

    /*
     * A test.
     * Subclass to particularize what happens per row.
     */
    static abstract class Test {
        protected final Random rand = new Random(System.currentTimeMillis());
        protected final int startRow;
        protected final int perClientRunRows;
        protected final int totalRows;
        private final Status status;
        protected CTable table;
        protected volatile Configuration conf;

        Test(final Configuration conf, final int startRow, final int perClientRunRows, final int totalRows,
                final Status status) {
            super();
            this.startRow = startRow;
            this.perClientRunRows = perClientRunRows;
            this.totalRows = totalRows;
            this.status = status;
            this.table = null;
            this.conf = conf;
        }

        /*
         * @return Generated random value to insert into a table cell.
         */
        byte[] generateValue() {
            byte[] b = new byte[(int) ROW_LENGTH];
            rand.nextBytes(b);
            return b;
        }

        private String generateStatus(final int sr, final int i, final int lr) {
            return sr + "/" + i + "/" + lr;
        }

        protected int getReportingPeriod() {
            return this.perClientRunRows / 100;
        }

        void testSetup() throws IOException {
            this.table = CTable.openTable(new CloudataConf(), tableInfo.getTableName());
            //      this.table.setAutoFulsh(false);
            //      this.table.setBufferSize(12 * 1024 * 1024);
        }

        @SuppressWarnings("unused")
        void testTakedown() throws IOException {
            // Empty
            //this.table.flush();
        }

        /*
         * Run test
         * @return Elapsed time.
         * @throws IOException
         */
        long test() throws IOException {
            long elapsedTime;
            testSetup();
            long startTime = System.currentTimeMillis();
            try {
                int lastRow = this.startRow + this.perClientRunRows;
                // Report on completion of 1/10th of total.
                for (int i = this.startRow; i < lastRow; i++) {
                    testRow(i);
                    if (status != null && i > 0 && (i % getReportingPeriod()) == 0) {
                        status.setStatus(generateStatus(this.startRow, i, lastRow));
                    }
                }
                elapsedTime = System.currentTimeMillis() - startTime;
            } finally {
                testTakedown();
            }
            return elapsedTime;
        }

        String getRandomRow() {
            return df.format(rand.nextInt(Integer.MAX_VALUE) % this.totalRows);
        }

        /*
         * Test for individual row.
         * @param i Row index.
         */
        abstract void testRow(final int i) throws IOException;

        /*
         * @return Test name.
         */
        abstract String getTestName();
    }

    class RandomReadTest extends Test {
        RandomReadTest(final Configuration conf, final int startRow, final int perClientRunRows,
                final int totalRows, final Status status) {
            super(conf, startRow, perClientRunRows, totalRows, status);
        }

        @Override
        void testRow(@SuppressWarnings("unused") final int i) throws IOException {
            Row.Key rowKey = new Row.Key(getRandomRow());
            //      System.out.println("Get:" + rowKey);
            byte[] data = this.table.get(rowKey, COLUMN_NAME, new Cell.Key(COLUMN_KEY));
            if (data == null) {
                LOG.info("result is null");
            }
        }

        @Override
        protected int getReportingPeriod() {
            // 
            return this.perClientRunRows / 100;
        }

        @Override
        String getTestName() {
            return "randomRead";
        }
    }

    class RandomWriteTest extends Test {
        RandomWriteTest(final Configuration conf, final int startRow, final int perClientRunRows,
                final int totalRows, final Status status) {
            super(conf, startRow, perClientRunRows, totalRows, status);
        }

        @Override
        void testRow(@SuppressWarnings("unused") final int i) throws IOException {
            Row.Key rowKey = new Row.Key(getRandomRow());
            try {
                Row row = new Row(rowKey);
                row.addCell(COLUMN_NAME, new Cell(new Cell.Key(COLUMN_KEY), generateValue()));
                table.put(row);
            } catch (IOException e) {
                LOG.error("RandowWrite testRow error:" + e.getMessage(), e);
            }
        }

        @Override
        String getTestName() {
            return "randomWrite";
        }
    }

    class ScanTest extends Test {
        private TableScanner testScanner;

        int count = 0;

        ScanTest(final Configuration conf, final int startRow, final int perClientRunRows, final int totalRows,
                final Status status) {
            super(conf, startRow, perClientRunRows, totalRows, status);
        }

        @Override
        void testSetup() throws IOException {
            super.testSetup();
            this.testScanner = ScannerFactory.openScanner(table, COLUMN_NAME);
        }

        @Override
        void testTakedown() throws IOException {
            if (this.testScanner != null) {
                this.testScanner.close();
            }
            super.testTakedown();
        }

        @Override
        void testRow(@SuppressWarnings("unused") final int i) throws IOException {
            ScanCell columnValue = this.testScanner.next();
            if (columnValue == null) {
                System.out.println("Scan result is null");
            } else {
                //System.out.println(count + ">" + columnValue);
                count++;
            }
        }

        @Override
        String getTestName() {
            return "scan";
        }
    }

    class SequentialReadTest extends Test {
        SequentialReadTest(final Configuration conf, final int startRow, final int perClientRunRows,
                final int totalRows, final Status status) {
            super(conf, startRow, perClientRunRows, totalRows, status);
        }

        @Override
        void testRow(final int i) throws IOException {
            byte[] data = table.get(new Row.Key(df.format(i)), COLUMN_NAME, new Cell.Key(COLUMN_KEY));
            if (data == null) {
                System.out.println(df.format(i) + " result is null");
            }
        }

        @Override
        String getTestName() {
            return "sequentialRead";
        }
    }

    class BatchUploadTest extends Test {
        DirectUploader batchUploaders;

        BatchUploadTest(final Configuration conf, final int startRow, final int perClientRunRows,
                final int totalRows, final Status status) {
            super(conf, startRow, perClientRunRows, totalRows, status);
        }

        @Override
        void testRow(final int row) throws IOException {
            if (batchUploaders == null) {
                CTable ctable = CTable.openTable(cConf, tableInfo.getTableName());
                batchUploaders = ctable.openDirectUploader(ctable.getColumnsArray());
            }
            Row.Key rowKey = new Row.Key(df.format(row));
            Row rowData = new Row(rowKey);
            rowData.addCell(COLUMN_NAME, new Cell(new Cell.Key(COLUMN_KEY), generateValue()));
            batchUploaders.put(rowData);
        }

        @Override
        String getTestName() {
            return "batchUpload";
        }

        void testTakedown() throws IOException {
            LOG.info("BatchUploader.close()");
            batchUploaders.close();
        }
    }

    class SequentialWriteTest extends Test {
        //BatchUploader[] batchUploaders;
        SequentialWriteTest(final Configuration conf, final int startRow, final int perClientRunRows,
                final int totalRows, final Status status) {
            super(conf, startRow, perClientRunRows, totalRows, status);
        }

        @Override
        void testRow(final int row) throws IOException {
            try {
                Row.Key rowKey = new Row.Key(df.format(row));
                Row rowData = new Row(rowKey);
                rowData.addCell(COLUMN_NAME, new Cell(new Cell.Key(COLUMN_KEY), generateValue()));
                table.put(rowData);
            } catch (Exception e) {
                System.out.println("Insert Error:" + e.getMessage());
            }
        }

        @Override
        String getTestName() {
            return "sequentialWrite";
        }
    }

    long runOneClient(final String cmd, final int startRow, final int perClientRunRows, final int totalRows,
            final Status status) throws IOException {
        status.setStatus("Start " + cmd + " at offset " + startRow + " for " + perClientRunRows + " rows");
        long totalElapsedTime = 0;
        if (cmd.equals(RANDOM_READ)) {
            Test t = new RandomReadTest(this.conf, startRow, perClientRunRows, totalRows, status);
            totalElapsedTime = t.test();
        } else if (cmd.equals(RANDOM_READ_MEM)) {
            throw new UnsupportedOperationException("Not yet implemented");
        } else if (cmd.equals(RANDOM_WRITE)) {
            Test t = new RandomWriteTest(this.conf, startRow, perClientRunRows, totalRows, status);
            totalElapsedTime = t.test();
        } else if (cmd.equals(SCAN)) {
            Test t = new ScanTest(this.conf, startRow, perClientRunRows, totalRows, status);
            totalElapsedTime = t.test();
        } else if (cmd.equals(SEQUENTIAL_READ)) {
            Test t = new SequentialReadTest(this.conf, startRow, perClientRunRows, totalRows, status);
            totalElapsedTime = t.test();
        } else if (cmd.equals(SEQUENTIAL_WRITE)) {
            Test t = new SequentialWriteTest(this.conf, startRow, perClientRunRows, totalRows, status);
            totalElapsedTime = t.test();
        } else if (cmd.equals(BATCH_UPLOAD)) {
            Test t = new BatchUploadTest(this.conf, startRow, perClientRunRows, totalRows, status);
            totalElapsedTime = t.test();
        } else {
            new IllegalArgumentException("Invalid command value: " + cmd);
        }
        status.setStatus("Finished " + cmd + " in " + totalElapsedTime + "ms at offset " + startRow + " for "
                + perClientRunRows + " rows");
        System.out.println("Finished " + cmd + " in " + totalElapsedTime + "ms at offset " + startRow + " for "
                + perClientRunRows + " rows");
        return totalElapsedTime;
    }

    private void runNIsOne(final String cmd) throws IOException {
        Status status = new Status() {
            @SuppressWarnings("unused")
            public void setStatus(String msg) throws IOException {
                LOG.info(msg);
            }
        };

        try {
            if (cmd.equals(BATCH_UPLOAD)) {
                batchUpload = true;
            }

            boolean needInsert = true;
            if (CTable.existsTable(cConf, tableInfo.getTableName())) {
                needInsert = false;
            }
            checkTable();

            if (cmd.equals(RANDOM_READ) || cmd.equals(RANDOM_READ_MEM) || cmd.equals(SCAN)
                    || cmd.equals(SEQUENTIAL_READ)) {
                if (needInsert) {
                    status.setStatus(
                            "Running " + SEQUENTIAL_WRITE + " first so " + cmd + " has data to work against");
                    runOneClient(SEQUENTIAL_WRITE, 0, this.R, this.R, status);
                }
            }

            runOneClient(cmd, 0, this.R, this.R, status);
        } catch (Exception e) {
            LOG.error("Failed", e);
        } finally {
            //LOG.info("Deleting table " + tableInfo.getTableName());
            //NTable.dropTable(tableInfo.getTableName());
        }
    }

    private void runTest(final String cmd) throws IOException {
        if (cmd.equals(RANDOM_READ_MEM)) {
            // For this one test, so all fits in memory, make R smaller (See
            // pg. 9 of BigTable paper).
            R = (ONE_GB / 10) * N;
        }

        try {
            if (N == 1) {
                // If there is only one client and one HRegionServer, we assume nothing
                // has been set up at all.
                runNIsOne(cmd);
            } else {
                // Else, run 
                runNIsMoreThanOne(cmd);
            }
        } finally {

        }
    }

    private void printUsage() {
        printUsage(null);
    }

    private void printUsage(final String message) {
        if (message != null && message.length() > 0) {
            System.err.println(message);
        }
        System.err.println("Usage: java " + this.getClass().getName() + "<command> <nclients>");
        System.err.println();
        System.err.println("Command:");
        System.err.println(" randomRead      Run random read test");
        System.err.println(" randomReadMem   Run random read test where table " + "is in memory");
        System.err.println(" randomWrite     Run random write test");
        System.err.println(" sequentialRead  Run sequential read test");
        System.err.println(" sequentialWrite Run sequential write test");
        System.err.println(" scan            Run scan test");
        System.err.println(" batchUpload     Run batchUpload test");
        System.err.println();
        System.err.println("Args:");
        System.err.println(" nclients        Integer. Required. Total number of client");
        System.err.println("                 running: 1 <= value <= 500");
        System.err.println("Examples:");
        System.err.println(" To run a single evaluation client:");
        System.err.println(" $ bin/cloudata jar cloudata-1.0-dev-test.jasr "
                + "org.cloudata.core.PerformanceTest sequentialWrite 1");
    }

    private void getArgs(final int start, final String[] args) {
        if (start + 1 > args.length) {
            throw new IllegalArgumentException("must supply the number of clients");
        }

        N = Integer.parseInt(args[start]);
        if (N > 500 || N < 1) {
            throw new IllegalArgumentException("Number of clients must be between " + "1 and 500.");
        }

        // Set total number of rows to write.
        R = ROWS_PER_GB * N;
    }

    public int doCommandLine(final String[] args) {
        // Process command-line args. TODO: Better cmd-line processing
        // (but hopefully something not as painful as cli options).    
        int errCode = -1;
        if (args.length < 1) {
            printUsage();
            return errCode;
        }

        try {
            for (int i = 0; i < args.length; i++) {
                String cmd = args[i];
                if (cmd.equals("-h") || cmd.startsWith("--h")) {
                    printUsage();
                    errCode = 0;
                    break;
                }

                //        final String masterArgKey = "--master=";
                //        if (cmd.startsWith(masterArgKey)) {
                //          this.conf.set(MASTER_ADDRESS, cmd.substring(masterArgKey.length()));
                //          continue;
                //        }
                //       
                //        final String miniClusterArgKey = "--miniCluster";
                //        if (cmd.startsWith(miniClusterArgKey)) {
                //          this.miniCluster = true;
                //          continue;
                //        }

                if (COMMANDS.contains(cmd)) {
                    getArgs(i + 1, args);
                    runTest(cmd);
                    errCode = 0;
                    break;
                }

                printUsage();
                break;
            }
        } catch (Exception e) {
            System.out.println(e.getMessage());
            //e.printStackTrace();
        }

        return errCode;
    }

    /**
     * @param args
     */
    public static void main(String[] args) throws IOException {
        System.exit(new PerformanceTest(new Configuration()).doCommandLine(args));
    }
}