org.apache.kylin.dict.CachedTreeMap.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.kylin.dict.CachedTreeMap.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
*/

package org.apache.kylin.dict;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.AbstractCollection;
import java.util.Collection;
import java.util.Iterator;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ExecutionException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.cache.RemovalListener;
import com.google.common.cache.RemovalNotification;

/**
 * Created by sunyerui on 16/5/2.
 * TODO Depends on HDFS for now, ideally just depends on storage interface
 */
public class CachedTreeMap<K extends WritableComparable, V extends Writable> extends TreeMap<K, V>
        implements Writable {
    private static final Logger logger = LoggerFactory.getLogger(CachedTreeMap.class);

    private final Class<K> keyClazz;
    private final Class<V> valueClazz;
    transient volatile Collection<V> values;
    private final LoadingCache<K, V> valueCache;
    private final Configuration conf;
    private final Path baseDir;
    private final Path versionDir;
    private final Path workingDir;
    private final FileSystem fs;
    private final boolean immutable;
    private final int maxVersions;
    private final long versionTTL;
    private boolean keepAppend;

    public static final int BUFFER_SIZE = 8 * 1024 * 1024;

    public static final String CACHED_PREFIX = "cached_";
    public static final String VERSION_PREFIX = "version_";

    public static class CachedTreeMapBuilder<K, V> {
        private Class<K> keyClazz;
        private Class<V> valueClazz;
        private int maxCount = 8;
        private String baseDir;
        private boolean immutable;
        private int maxVersions;
        private long versionTTL;

        public static CachedTreeMapBuilder newBuilder() {
            return new CachedTreeMapBuilder();
        }

        private CachedTreeMapBuilder() {
        }

        public CachedTreeMapBuilder keyClazz(Class<K> clazz) {
            this.keyClazz = clazz;
            return this;
        }

        public CachedTreeMapBuilder valueClazz(Class<V> clazz) {
            this.valueClazz = clazz;
            return this;
        }

        public CachedTreeMapBuilder<K, V> maxSize(int maxCount) {
            this.maxCount = maxCount;
            return this;
        }

        public CachedTreeMapBuilder<K, V> baseDir(String baseDir) {
            this.baseDir = baseDir;
            return this;
        }

        public CachedTreeMapBuilder<K, V> immutable(boolean immutable) {
            this.immutable = immutable;
            return this;
        }

        public CachedTreeMapBuilder<K, V> maxVersions(int maxVersions) {
            this.maxVersions = maxVersions;
            return this;
        }

        public CachedTreeMapBuilder<K, V> versionTTL(long versionTTL) {
            this.versionTTL = versionTTL;
            return this;
        }

        public CachedTreeMap build() throws IOException {
            if (baseDir == null) {
                throw new RuntimeException("CachedTreeMap need a baseDir to cache data");
            }
            if (keyClazz == null || valueClazz == null) {
                throw new RuntimeException("CachedTreeMap need key and value clazz to serialize data");
            }
            CachedTreeMap map = new CachedTreeMap(maxCount, keyClazz, valueClazz, baseDir, immutable, maxVersions,
                    versionTTL);
            return map;
        }
    }

    private CachedTreeMap(int maxCount, Class<K> keyClazz, Class<V> valueClazz, String basePath, boolean immutable,
            int maxVersions, long versionTTL) throws IOException {
        super();
        this.keyClazz = keyClazz;
        this.valueClazz = valueClazz;
        this.immutable = immutable;
        this.keepAppend = true;
        this.maxVersions = maxVersions;
        this.versionTTL = versionTTL;
        this.conf = new Configuration();
        if (basePath.endsWith("/")) {
            basePath = basePath.substring(0, basePath.length() - 1);
        }
        this.baseDir = new Path(basePath);
        this.fs = FileSystem.get(baseDir.toUri(), conf);
        if (!fs.exists(baseDir)) {
            fs.mkdirs(baseDir);
        }
        this.versionDir = getLatestVersion(conf, fs, baseDir);
        this.workingDir = new Path(baseDir, "working");
        if (!this.immutable) {
            // For mutable map, copy all data into working dir and work on it, avoiding suddenly server crash made data corrupt
            if (fs.exists(workingDir)) {
                fs.delete(workingDir, true);
            }
            FileUtil.copy(fs, versionDir, fs, workingDir, false, true, conf);
        }
        CacheBuilder builder = CacheBuilder.newBuilder().removalListener(new RemovalListener<K, V>() {
            @Override
            public void onRemoval(RemovalNotification<K, V> notification) {
                logger.info(String.format("Evict cache key %s(%d) with value %s caused by %s, size %d/%d ",
                        notification.getKey(), notification.getKey().hashCode(), notification.getValue(),
                        notification.getCause(), size(), valueCache.size()));
                switch (notification.getCause()) {
                case SIZE:
                    writeValue(notification.getKey(), notification.getValue());
                    break;
                case EXPLICIT:
                    deleteValue(notification.getKey());
                    break;
                default:
                }
            }
        });
        if (this.immutable) {
            // For immutable values, load all values as much as possible, and evict by soft reference to free memory when gc
            builder.softValues();
        } else {
            builder.maximumSize(maxCount);
        }
        this.valueCache = builder.build(new CacheLoader<K, V>() {
            @Override
            public V load(K key) throws Exception {
                V value = readValue(key);
                logger.info(String.format("Load cache by key %s(%d) with value %s", key, key.hashCode(), value));
                return value;
            }
        });
    }

    private String generateFileName(K key) {
        String file = getCurrentDir() + "/" + CACHED_PREFIX + key.toString();
        return file;
    }

    private String getCurrentDir() {
        return immutable ? versionDir.toString() : workingDir.toString();
    }

    private static String[] listAllVersions(FileSystem fs, Path baseDir) throws IOException {
        FileStatus[] fileStatus = fs.listStatus(baseDir, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                if (path.getName().startsWith(VERSION_PREFIX)) {
                    return true;
                }
                return false;
            }
        });
        TreeSet<String> versions = new TreeSet<>();
        for (FileStatus status : fileStatus) {
            versions.add(status.getPath().toString());
        }
        return versions.toArray(new String[versions.size()]);
    }

    // only for test
    public String getLatestVersion() throws IOException {
        return getLatestVersion(conf, fs, baseDir).toUri().getPath();
    }

    private static Path getLatestVersion(Configuration conf, FileSystem fs, Path baseDir) throws IOException {
        String[] versions = listAllVersions(fs, baseDir);
        if (versions.length > 0) {
            return new Path(versions[versions.length - 1]);
        } else {
            // Old format, directly use base dir, convert to new format
            Path newVersionDir = new Path(baseDir, VERSION_PREFIX + System.currentTimeMillis());
            Path tmpNewVersionDir = new Path(baseDir, "tmp_" + VERSION_PREFIX + System.currentTimeMillis());
            Path indexFile = new Path(baseDir, ".index");
            FileStatus[] cachedFiles;
            try {
                cachedFiles = fs.listStatus(baseDir, new PathFilter() {
                    @Override
                    public boolean accept(Path path) {
                        if (path.getName().startsWith(CACHED_PREFIX)) {
                            return true;
                        }
                        return false;
                    }
                });
                fs.mkdirs(tmpNewVersionDir);
                if (fs.exists(indexFile) && cachedFiles.length > 0) {
                    FileUtil.copy(fs, indexFile, fs, tmpNewVersionDir, false, true, conf);
                    for (FileStatus file : cachedFiles) {
                        FileUtil.copy(fs, file.getPath(), fs, tmpNewVersionDir, false, true, conf);
                    }
                }
                fs.rename(tmpNewVersionDir, newVersionDir);
                if (fs.exists(indexFile) && cachedFiles.length > 0) {
                    fs.delete(indexFile, true);
                    for (FileStatus file : cachedFiles) {
                        fs.delete(file.getPath(), true);
                    }
                }
            } finally {
                if (fs.exists(tmpNewVersionDir)) {
                    fs.delete(tmpNewVersionDir, true);
                }
            }
            return newVersionDir;
        }
    }

    public void commit(boolean keepAppend) throws IOException {
        assert this.keepAppend
                && !immutable : "Only support commit method with immutable false and keepAppend true";

        Path newVersionDir = new Path(baseDir, VERSION_PREFIX + System.currentTimeMillis());
        if (keepAppend) {
            // Copy to tmp dir, and rename to new version, make sure it's complete when be visible
            Path tmpNewVersionDir = new Path(baseDir, "tmp_" + VERSION_PREFIX + System.currentTimeMillis());
            try {
                FileUtil.copy(fs, workingDir, fs, tmpNewVersionDir, false, true, conf);
                fs.rename(tmpNewVersionDir, newVersionDir);
            } finally {
                if (fs.exists(tmpNewVersionDir)) {
                    fs.delete(tmpNewVersionDir, true);
                }
            }
        } else {
            fs.rename(workingDir, newVersionDir);
        }
        this.keepAppend = keepAppend;

        // Check versions count, delete expired versions
        String[] versions = listAllVersions(fs, baseDir);
        long timestamp = System.currentTimeMillis();
        for (int i = 0; i < versions.length - maxVersions; i++) {
            String versionString = versions[i]
                    .substring(versions[i].lastIndexOf(VERSION_PREFIX) + VERSION_PREFIX.length());
            long version = Long.parseLong(versionString);
            if (version + versionTTL < timestamp) {
                fs.delete(new Path(versions[i]), true);
            }
        }
    }

    public void loadEntry(CachedTreeMap other) {
        for (Object key : other.keySet()) {
            super.put((K) key, null);
        }
    }

    private void writeValue(K key, V value) {
        if (immutable) {
            return;
        }
        String fileName = generateFileName(key);
        Path filePath = new Path(fileName);
        try (FSDataOutputStream out = fs.create(filePath, true, BUFFER_SIZE, (short) 5, BUFFER_SIZE * 8L)) {
            value.write(out);
        } catch (Exception e) {
            logger.error(String.format("write value into %s exception: %s", fileName, e), e);
            throw new RuntimeException(e.getCause());
        }
    }

    private V readValue(K key) throws Exception {
        String fileName = generateFileName(key);
        Path filePath = new Path(fileName);
        try (FSDataInputStream input = fs.open(filePath, BUFFER_SIZE)) {
            V value = valueClazz.newInstance();
            value.readFields(input);
            return value;
        } catch (Exception e) {
            logger.error(String.format("read value from %s exception: %s", fileName, e), e);
            return null;
        }
    }

    private void deleteValue(K key) {
        if (immutable) {
            return;
        }
        String fileName = generateFileName(key);
        Path filePath = new Path(fileName);
        try {
            if (fs.exists(filePath)) {
                fs.delete(filePath, true);
            }
        } catch (Exception e) {
            logger.error(String.format("delete value file %s exception: %s", fileName, e), e);
        }
    }

    @Override
    public V put(K key, V value) {
        assert keepAppend && !immutable : "Only support put method with immutable false and keepAppend true";
        super.put(key, null);
        valueCache.put(key, value);
        return null;
    }

    @Override
    public V get(Object key) {
        if (super.containsKey(key)) {
            try {
                return valueCache.get((K) key);
            } catch (ExecutionException e) {
                logger.error(String.format("get value with key %s exception: %s", key, e), e);
                return null;
            }
        } else {
            return null;
        }
    }

    @Override
    public V remove(Object key) {
        assert keepAppend && !immutable : "Only support remove method with immutable false keepAppend true";
        super.remove(key);
        valueCache.invalidate(key);
        return null;
    }

    @Override
    public void clear() {
        super.clear();
        values = null;
        valueCache.invalidateAll();
    }

    public Collection<V> values() {
        Collection<V> vs = values;
        return (vs != null) ? vs : (values = new Values());
    }

    class Values extends AbstractCollection<V> {
        @Override
        public Iterator<V> iterator() {
            return new ValueIterator<>();
        }

        @Override
        public int size() {
            return CachedTreeMap.this.size();
        }
    }

    class ValueIterator<V> implements Iterator<V> {
        Iterator<K> keyIterator;
        K currentKey;

        public ValueIterator() {
            keyIterator = CachedTreeMap.this.keySet().iterator();
        }

        @Override
        public boolean hasNext() {
            return keyIterator.hasNext();
        }

        @Override
        public V next() {
            currentKey = keyIterator.next();
            try {
                return (V) valueCache.get(currentKey);
            } catch (ExecutionException e) {
                logger.error(String.format("get value with key %s exception: %s", currentKey, e), e);
                return null;
            }
        }

        @Override
        public void remove() {
            assert keepAppend && !immutable : "Only support remove method with immutable false and keepAppend true";
            keyIterator.remove();
            valueCache.invalidate(currentKey);
        }
    }

    public FSDataOutputStream openIndexOutput() throws IOException {
        assert keepAppend && !immutable : "Only support write method with immutable false and keepAppend true";
        Path indexPath = new Path(getCurrentDir(), ".index");
        return fs.create(indexPath, true, 8 * 1024 * 1024, (short) 5, 8 * 1024 * 1024 * 8);
    }

    public FSDataInputStream openIndexInput() throws IOException {
        Path indexPath = new Path(getCurrentDir(), ".index");
        return fs.open(indexPath, 8 * 1024 * 1024);
    }

    public static FSDataInputStream openLatestIndexInput(Configuration conf, String baseDir) throws IOException {
        Path basePath = new Path(baseDir);
        FileSystem fs = FileSystem.get(basePath.toUri(), conf);
        Path indexPath = new Path(getLatestVersion(conf, fs, basePath), ".index");
        return fs.open(indexPath, 8 * 1024 * 1024);
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeInt(size());
        for (K key : keySet()) {
            key.write(out);
            V value = valueCache.getIfPresent(key);
            if (null != value) {
                writeValue(key, value);
            }
        }
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        int size = in.readInt();
        try {
            for (int i = 0; i < size; i++) {
                K key = keyClazz.newInstance();
                key.readFields(in);
                super.put(key, null);
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
}