org.opencloudb.route.function.PartitionByMurmurHash.java Source code

Java tutorial

Introduction

Here is the source code for org.opencloudb.route.function.PartitionByMurmurHash.java

Source

/*
 * Copyright (c) 2013, OpenCloudDB/MyCAT and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software;Designed and Developed mainly by many Chinese 
 * opensource volunteers. you can redistribute it and/or modify it under the 
 * terms of the GNU General Public License version 2 only, as published by the
 * Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 * 
 * Any questions about this component can be directed to it's project Web address 
 * https://code.google.com/p/opencloudb/.
 *
 */
package org.opencloudb.route.function;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.SortedMap;
import java.util.TreeMap;

import org.opencloudb.config.model.rule.RuleAlgorithm;
import org.opencloudb.exception.MurmurHashException;

import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;

/**
 * consistancy hash, murmur hash
 * implemented by Guava
 * @author wuzhih
 *
 */
public class PartitionByMurmurHash extends AbstractPartitionAlgorithm implements RuleAlgorithm {
    private static final int DEFAULT_VIRTUAL_BUCKET_TIMES = 160;
    private static final int DEFAULT_WEIGHT = 1;
    private static final Charset DEFAULT_CHARSET = Charset.forName("UTF-8");

    private int seed;
    private int count;
    private int virtualBucketTimes = DEFAULT_VIRTUAL_BUCKET_TIMES;
    private Map<Integer, Integer> weightMap = new HashMap<>();
    //   private String bucketMapPath;

    private HashFunction hash;

    private SortedMap<Integer, Integer> bucketMap;

    @Override
    public void init() {
        try {
            bucketMap = new TreeMap<>();
            //         boolean serializableBucketMap=bucketMapPath!=null && bucketMapPath.length()>0;
            //         if(serializableBucketMap){
            //            File bucketMapFile=new File(bucketMapPath);
            //            if(bucketMapFile.exists() && bucketMapFile.length()>0){
            //               loadBucketMapFile();
            //               return;
            //            }
            //         }
            generateBucketMap();
            //         if(serializableBucketMap){
            //            storeBucketMap();
            //         }
        } catch (Exception e) {
            throw new MurmurHashException(e);
        }
    }

    private void generateBucketMap() {
        hash = Hashing.murmur3_32(seed);//
        for (int i = 0; i < count; i++) {//TreeMap
            StringBuilder hashName = new StringBuilder("SHARD-").append(i);
            for (int n = 0, shard = virtualBucketTimes * getWeight(i); n < shard; n++) {
                bucketMap.put(hash.hashUnencodedChars(hashName.append("-NODE-").append(n)).asInt(), i);
            }
        }
        weightMap = null;
    }

    //   private void storeBucketMap() throws IOException{
    //      try(OutputStream store=new FileOutputStream(bucketMapPath)){
    //         Properties props=new Properties();
    //         for(Map.Entry entry:bucketMap.entrySet()){
    //            props.setProperty(entry.getKey().toString(), entry.getValue().toString());
    //         }
    //         props.store(store,null);
    //      }
    //   }
    //   private void loadBucketMapFile() throws FileNotFoundException, IOException{
    //      try(InputStream in=new FileInputStream(bucketMapPath)){
    //         Properties props=new Properties();
    //         props.load(in);
    //         for(Map.Entry entry:props.entrySet()){
    //            bucketMap.put(Integer.parseInt(entry.getKey().toString()), Integer.parseInt(entry.getValue().toString()));
    //         }
    //      }
    //   }
    /**
     * ???DB
     * 0?key????1
     * 
     * @param bucket
     * @return
     */
    private int getWeight(int bucket) {
        Integer w = weightMap.get(bucket);
        if (w == null) {
            w = DEFAULT_WEIGHT;
        }
        return w;
    }

    /**
     * murmur_hash??0
     * @param seed
     */
    public void setSeed(int seed) {
        this.seed = seed;
    }

    /**
     * ?
     * @param count
     */
    public void setCount(int count) {
        this.count = count;
    }

    /**
     * ?virtualBucketTimes*count?
     * @param virtualBucketTimes
     */
    public void setVirtualBucketTimes(int virtualBucketTimes) {
        this.virtualBucketTimes = virtualBucketTimes;
    }

    /**
     * ????1properties?0count-1key??
     * ???1
     * @param weightMapPath
     * @throws IOException 
     * @throws  
     */
    public void setWeightMapFile(String weightMapPath) throws IOException {
        Properties props = new Properties();
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(
                this.getClass().getClassLoader().getResourceAsStream(weightMapPath), DEFAULT_CHARSET))) {
            props.load(reader);
            for (Map.Entry entry : props.entrySet()) {
                int weight = Integer.parseInt(entry.getValue().toString());
                weightMap.put(Integer.parseInt(entry.getKey().toString()), weight > 0 ? weight : 1);
            }
        }
    }

    //   /**
    //    * ?hash
    //    * ?count, weightMapFileMurmurHash??
    //    * ?MurmurHash??
    //    * ???????????
    //    * ?????murmur hash
    //    * @param bucketMapPath
    //    */
    //   public void setBucketMapPath(String bucketMapPath){
    //      this.bucketMapPath=bucketMapPath;
    //   }
    @Override
    public Integer calculate(String columnValue) {
        SortedMap<Integer, Integer> tail = bucketMap.tailMap(hash.hashUnencodedChars(columnValue).asInt());
        if (tail.isEmpty()) {
            return bucketMap.get(bucketMap.firstKey());
        }
        return tail.get(tail.firstKey());
    }

    private static void hashTest() throws IOException {
        PartitionByMurmurHash hash = new PartitionByMurmurHash();
        hash.count = 10;//
        hash.init();

        int[] bucket = new int[hash.count];

        Map<Integer, List<Integer>> hashed = new HashMap<>();

        int total = 1000_0000;//??
        int c = 0;
        for (int i = 100_0000; i < total + 100_0000; i++) {//?100
            c++;
            int h = hash.calculate(Integer.toString(i));
            bucket[h]++;
            List<Integer> list = hashed.get(h);
            if (list == null) {
                list = new ArrayList<>();
                hashed.put(h, list);
            }
            list.add(i);
        }
        System.out.println(c + "   " + total);
        double d = 0;
        c = 0;
        int idx = 0;
        System.out.println("index    bucket   ratio");
        for (int i : bucket) {
            d += i / (double) total;
            c += i;
            System.out.println(idx++ + "  " + i + "   " + (i / (double) total));
        }
        System.out.println(d + "  " + c);

        Properties props = new Properties();
        for (Map.Entry entry : hash.bucketMap.entrySet()) {
            props.setProperty(entry.getKey().toString(), entry.getValue().toString());
        }
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        props.store(out, null);

        props.clear();
        props.load(new ByteArrayInputStream(out.toByteArray()));
        System.out.println(props);
        System.out.println("****************************************************");
        //      rehashTest(hashed.get(0));
    }

    private static void rehashTest(List<Integer> partition) {
        PartitionByMurmurHash hash = new PartitionByMurmurHash();
        hash.count = 12;//
        hash.init();

        int[] bucket = new int[hash.count];

        int total = partition.size();//??
        int c = 0;
        for (int i : partition) {//?100
            c++;
            int h = hash.calculate(Integer.toString(i));
            bucket[h]++;
        }
        System.out.println(c + "   " + total);
        c = 0;
        int idx = 0;
        System.out.println("index    bucket   ratio");
        for (int i : bucket) {
            c += i;
            System.out.println(idx++ + "  " + i + "   " + (i / (double) total));
        }
    }

    public static void main(String[] args) throws IOException {
        hashTest();
    }
}