org.apache.kylin.storage.hbase.util.HbaseStreamingInput.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.kylin.storage.hbase.util.HbaseStreamingInput.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.kylin.storage.hbase.util;

import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Calendar;
import java.util.List;
import java.util.Random;
import java.util.concurrent.Semaphore;

import org.apache.commons.io.IOUtils;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.storage.hbase.HBaseConnection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Lists;

/**
 */
public class HbaseStreamingInput {
    private static final Logger logger = LoggerFactory.getLogger(HbaseStreamingInput.class);

    private static final int CELL_SIZE = 128 * 1024; // 128 KB
    private static final byte[] CF = "F".getBytes();
    private static final byte[] QN = "C".getBytes();

    public static void createTable(String tableName) throws IOException {
        Connection conn = getConnection();
        Admin hadmin = conn.getAdmin();

        try {
            boolean tableExist = hadmin.tableExists(TableName.valueOf(tableName));
            if (tableExist) {
                logger.info("HTable '" + tableName + "' already exists");
                return;
            }

            logger.info("Creating HTable '" + tableName + "'");
            HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
            desc.setValue(HTableDescriptor.SPLIT_POLICY, DisabledRegionSplitPolicy.class.getName());//disable region split
            desc.setMemStoreFlushSize(512 << 20);//512M

            HColumnDescriptor fd = new HColumnDescriptor(CF);
            fd.setBlocksize(CELL_SIZE);
            desc.addFamily(fd);
            hadmin.createTable(desc);

            logger.info("HTable '" + tableName + "' created");
        } finally {
            IOUtils.closeQuietly(conn);
            IOUtils.closeQuietly(hadmin);
        }
    }

    private static void scheduleJob(Semaphore semaphore, int interval) {
        while (true) {
            semaphore.release();
            try {
                Thread.sleep(interval);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }

    public static void addData(String tableName) throws IOException {

        createTable(tableName);

        final Semaphore semaphore = new Semaphore(0);
        new Thread(new Runnable() {
            @Override
            public void run() {
                scheduleJob(semaphore, 300000);//5 minutes a batch
            }
        }).start();

        while (true) {
            try {
                semaphore.acquire();
                int waiting = semaphore.availablePermits();
                if (waiting > 0) {
                    logger.warn("There are another " + waiting + " batches waiting to be added");
                }
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                e.printStackTrace();
            }

            Connection conn = getConnection();
            Table table = conn.getTable(TableName.valueOf(tableName));

            byte[] key = new byte[8 + 4];//time + id

            logger.info("============================================");
            long startTime = System.currentTimeMillis();
            logger.info("data load start time in millis: " + startTime);
            logger.info("data load start at " + formatTime(startTime));
            List<Put> buffer = Lists.newArrayList();
            for (int i = 0; i < (1 << 10); ++i) {
                long time = System.currentTimeMillis();
                Bytes.putLong(key, 0, time);
                Bytes.putInt(key, 8, i);
                Put put = new Put(key);
                byte[] cell = randomBytes(CELL_SIZE);
                put.addColumn(CF, QN, cell);
                buffer.add(put);
            }
            table.put(buffer);
            table.close();
            conn.close();
            long endTime = System.currentTimeMillis();
            logger.info("data load end at " + formatTime(endTime));
            logger.info("data load time consumed: " + (endTime - startTime));
            logger.info("============================================");
        }
    }

    public static void randomScan(String tableName) throws IOException {

        final Semaphore semaphore = new Semaphore(0);
        new Thread(new Runnable() {
            @Override
            public void run() {
                scheduleJob(semaphore, 60000);//1 minutes a batch
            }
        }).start();

        while (true) {
            try {
                semaphore.acquire();
                int waiting = semaphore.drainPermits();
                if (waiting > 0) {
                    logger.warn("Too many queries to handle! Blocking " + waiting + " sets of scan requests");
                }
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                e.printStackTrace();
            }

            Random r = new Random();
            Connection conn = getConnection();
            Table table = conn.getTable(TableName.valueOf(tableName));

            long leftBound = getFirstKeyTime(table);
            long rightBound = System.currentTimeMillis();

            for (int t = 0; t < 5; ++t) {
                long start = (long) (leftBound + r.nextDouble() * (rightBound - leftBound));
                long end = start + 600000;//a period of 10 minutes
                logger.info("A scan from " + formatTime(start) + " to " + formatTime(end));

                Scan scan = new Scan();
                scan.setStartRow(Bytes.toBytes(start));
                scan.setStopRow(Bytes.toBytes(end));
                scan.addFamily(CF);
                ResultScanner scanner = table.getScanner(scan);
                long hash = 0;
                int rowCount = 0;
                for (Result result : scanner) {
                    Cell cell = result.getColumnLatestCell(CF, QN);
                    byte[] value = cell.getValueArray();
                    if (cell.getValueLength() != CELL_SIZE) {
                        logger.error("value size invalid!!!!!");
                    }

                    hash += Arrays.hashCode(Arrays.copyOfRange(value, cell.getValueOffset(),
                            cell.getValueLength() + cell.getValueOffset()));
                    rowCount++;
                }
                scanner.close();
                logger.info("Scanned " + rowCount + " rows, the (meaningless) hash for the scan is " + hash);
            }
            table.close();
            conn.close();
        }
    }

    private static long getFirstKeyTime(Table table) throws IOException {
        long startTime = 0;

        Scan scan = new Scan();
        scan.addFamily(CF);
        ResultScanner scanner = table.getScanner(scan);
        for (Result result : scanner) {
            Cell cell = result.getColumnLatestCell(CF, QN);
            byte[] key = cell.getRowArray();
            startTime = Bytes.toLong(key, cell.getRowOffset(), 8);
            logger.info("Retrieved first record time: " + formatTime(startTime));
            break;//only get first one
        }
        scanner.close();
        return startTime;

    }

    private static Connection getConnection() throws IOException {
        return HBaseConnection.get(KylinConfig.getInstanceFromEnv().getStorageUrl());
    }

    private static String formatTime(long time) {
        DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
        Calendar cal = Calendar.getInstance();
        cal.setTimeInMillis(time);
        return dateFormat.format(cal.getTime());
    }

    private static byte[] randomBytes(int lenth) {
        byte[] bytes = new byte[lenth];
        Random rand = new Random();
        rand.nextBytes(bytes);
        return bytes;
    }

    public static void main(String[] args) throws Exception {

        if (args[0].equalsIgnoreCase("createtable")) {
            createTable(args[1]);
        } else if (args[0].equalsIgnoreCase("adddata")) {
            addData(args[1]);
        } else if (args[0].equalsIgnoreCase("randomscan")) {
            randomScan(args[1]);
        }
    }

}