com.qubole.rubix.core.CachingFileSystem.java Source code

Java tutorial

Introduction

Here is the source code for com.qubole.rubix.core.CachingFileSystem.java

Source

/**
 * Copyright (c) 2016. Qubole Inc
 * Licensed under the Apache License, Version 2.0 (the License);
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License. See accompanying LICENSE file.
 */
package com.qubole.rubix.core;

import com.google.common.base.Charsets;
import com.google.common.base.Throwables;
import com.google.common.hash.HashCode;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import com.qubole.rubix.spi.CacheConfig;
import com.qubole.rubix.spi.ClusterManager;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.BufferedFSInputStream;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.util.Progressable;
import org.weakref.jmx.MBeanExporter;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Type;
import java.net.URI;
import java.util.List;

import static com.qubole.rubix.spi.CacheConfig.skipCache;

/**
 * Created by stagra on 29/12/15.
 */
public abstract class CachingFileSystem<T extends FileSystem> extends FileSystem {
    private static final Log log = LogFactory.getLog(CachingFileSystem.class);
    private T fs = null;
    private ClusterManager clusterManager;

    private boolean cacheSkipped = false;

    private static CachingFileSystemStats statsMBean;

    static {
        MBeanExporter exporter = new MBeanExporter(ManagementFactory.getPlatformMBeanServer());
        statsMBean = new CachingFileSystemStats();
        exporter.export("rubix:name=stats", statsMBean);
    }

    // this magic is necessary to create an instance of type T
    @SuppressWarnings("unchecked")
    private Class<T> getTypeParameterClass() {
        Type type = getClass().getGenericSuperclass();
        ParameterizedType paramType = (ParameterizedType) type;
        return (Class<T>) paramType.getActualTypeArguments()[0];
    }

    public CachingFileSystem() {
        try {
            this.fs = getTypeParameterClass().newInstance();
        } catch (InstantiationException | IllegalAccessException e) {
            log.error("cannot instantiate base filesystem ", e);
            Throwables.propagate(e);
        }
    }

    public void setClusterManager(ClusterManager clusterManager) {
        this.clusterManager = clusterManager;
    }

    @Override
    public void initialize(URI uri, Configuration conf) throws IOException {
        if (clusterManager == null) {
            throw new IOException("Cluster Manager not set");
        }
        super.initialize(uri, conf);
        fs.initialize(uri, conf);
    }

    @Override
    public URI getUri() {
        return fs.getUri();
    }

    @Override
    public FSDataInputStream open(Path path, int bufferSize) throws IOException {
        FSDataInputStream inputStream = fs.open(path, bufferSize);

        if (skipCache(path, getConf())) {
            cacheSkipped = true;
            return inputStream;
        }

        return new FSDataInputStream(new BufferedFSInputStream(
                new CachingInputStream(inputStream, this, path, this.getConf(), statsMBean,
                        clusterManager.getSplitSize(), clusterManager.getClusterType()),
                CacheConfig.getBlockSize(getConf())));
    }

    @Override
    public FSDataOutputStream create(Path path, FsPermission fsPermission, boolean b, int i, short i1, long l,
            Progressable progressable) throws IOException {
        //CachingInputStream.invalidate(path);
        return fs.create(path, fsPermission, b, i, i1, l, progressable);
    }

    @Override
    public FSDataOutputStream append(Path path, int i, Progressable progressable) throws IOException {
        //CachingInputStream.invalidate(path);
        return fs.append(path, i, progressable);
    }

    @Override
    public boolean rename(Path path, Path path1) throws IOException {
        //CachingInputStream.invalidate(path);
        //CachingInputStream.invalidate(path1);
        return fs.rename(path, path1);
    }

    @Override
    public boolean delete(Path path) throws IOException {
        // TODO: Support directory invalidation
        // When we do support caching file listings, we should invalidate that cache here
        // Invalidation in that cache will be responsible to call CachingInputStream.invalidate()
        // for each file listing in the path
        //CachingInputStream.invalidate(path);
        return fs.delete(path);
    }

    @Override
    public boolean delete(Path path, boolean b) throws IOException {
        // TODO: Support directory invalidation, same as in delete(Path)
        //CachingInputStream.invalidate(path);
        return fs.delete(path, b);
    }

    @Override
    public FileStatus[] listStatus(Path path) throws FileNotFoundException, IOException {
        return fs.listStatus(path);
    }

    @Override
    public void setWorkingDirectory(Path path) {
        fs.setWorkingDirectory(path);
    }

    @Override
    public Path getWorkingDirectory() {
        return fs.getWorkingDirectory();
    }

    @Override
    public boolean mkdirs(Path path, FsPermission fsPermission) throws IOException {
        return fs.mkdirs(path, fsPermission);
    }

    @Override
    public FileStatus getFileStatus(Path path) throws IOException {
        return fs.getFileStatus(path);
    }

    @Override
    public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
        if (!clusterManager.isMaster() || cacheSkipped) {
            // If in worker node, blockLocation does not matter
            return fs.getFileBlockLocations(file, start, len);
        }

        List<String> nodes = clusterManager.getNodes();

        if (file == null) {
            return null;
        } else if (start >= 0L && len >= 0L) {
            if (file.getLen() < start) {
                return new BlockLocation[0];
            } else {
                // Using similar logic of returning all Blocks as FileSystem.getFileBlockLocations does instead of only returning blocks from start till len

                BlockLocation[] blockLocations = new BlockLocation[(int) Math
                        .ceil((double) file.getLen() / clusterManager.getSplitSize())];
                int blockNumber = 0;
                for (long i = 0; i < file.getLen(); i = i + clusterManager.getSplitSize()) {
                    long end = i + clusterManager.getSplitSize();
                    if (end > file.getLen()) {
                        end = file.getLen();
                    }
                    String key = file.getPath().toString() + i + end;
                    HashFunction hf = Hashing.md5();
                    HashCode hc = hf.hashString(key, Charsets.UTF_8);
                    int nodeIndex = Hashing.consistentHash(hc, nodes.size());
                    String[] name = new String[] { nodes.get(nodeIndex) };
                    String[] host = new String[] { nodes.get(nodeIndex) };
                    blockLocations[blockNumber++] = new BlockLocation(name, host, i, end - i);
                    log.info(String.format("BlockLocation %s %d %d %s totalHosts: %s", file.getPath().toString(), i,
                            end - i, host[0], nodes.size()));
                }

                return blockLocations;
            }
        } else {
            throw new IllegalArgumentException("Invalid start or len parameter");
        }
    }
}