gobblin.metastore.FsStateStore.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.metastore.FsStateStore.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.metastore;

import static gobblin.util.HadoopUtils.FS_SCHEMES_NON_ATOMIC;

import com.google.common.base.Predicate;
import gobblin.util.HadoopUtils;
import java.io.IOException;
import java.net.URI;
import java.util.Collection;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.DefaultCodec;

import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.io.Closer;

import gobblin.configuration.State;
import gobblin.util.WritableShimSerialization;

/**
 * An implementation of {@link StateStore} backed by a {@link FileSystem}.
 *
 * <p>
 *     This implementation uses Hadoop {@link org.apache.hadoop.io.SequenceFile}
 *     to store {@link State}s. Each store maps to one directory, and each
 *     table maps to one file under the store directory. Keys are state IDs
 *     (see {@link State#getId()}), and values are objects of {@link State} or
 *     any of its extensions. Keys will be empty strings if state IDs are not set
 *     (i.e., {@link State#getId()} returns <em>null</em>). In this case, the
 *     {@link FsStateStore#get(String, String, String)} method may not work.
 * </p>
 *
 * @param <T> state object type
 *
 * @author Yinan Li
 */
public class FsStateStore<T extends State> implements StateStore<T> {

    public static final String TMP_FILE_PREFIX = "_tmp_";

    protected final Configuration conf;
    protected final FileSystem fs;
    protected boolean useTmpFileForPut;

    // Root directory for the task state store
    protected final String storeRootDir;

    // Class of the state objects to be put into the store
    private final Class<T> stateClass;

    public FsStateStore(String fsUri, String storeRootDir, Class<T> stateClass) throws IOException {
        this.conf = getConf(null);
        this.fs = FileSystem.get(URI.create(fsUri), this.conf);
        this.useTmpFileForPut = !FS_SCHEMES_NON_ATOMIC.contains(this.fs.getUri().getScheme());
        this.storeRootDir = storeRootDir;
        this.stateClass = stateClass;
    }

    /**
     * Get a Hadoop configuration that understands how to (de)serialize WritableShim objects.
     */
    private Configuration getConf(Configuration otherConf) {
        Configuration conf;
        if (otherConf == null) {
            conf = new Configuration();
        } else {
            conf = new Configuration(otherConf);
        }

        WritableShimSerialization.addToHadoopConfiguration(conf);

        return conf;
    }

    public FsStateStore(FileSystem fs, String storeRootDir, Class<T> stateClass) {
        this.fs = fs;
        this.useTmpFileForPut = !FS_SCHEMES_NON_ATOMIC.contains(this.fs.getUri().getScheme());
        this.conf = getConf(this.fs.getConf());
        this.storeRootDir = storeRootDir;
        this.stateClass = stateClass;
    }

    public FsStateStore(String storeUrl, Class<T> stateClass) throws IOException {
        this.conf = getConf(null);
        Path storePath = new Path(storeUrl);
        this.fs = storePath.getFileSystem(this.conf);
        this.useTmpFileForPut = !FS_SCHEMES_NON_ATOMIC.contains(this.fs.getUri().getScheme());
        this.storeRootDir = storePath.toUri().getPath();
        this.stateClass = stateClass;
    }

    @Override
    public boolean create(String storeName) throws IOException {
        Path storePath = new Path(this.storeRootDir, storeName);
        return this.fs.exists(storePath) || this.fs.mkdirs(storePath, new FsPermission((short) 0755));
    }

    @Override
    public boolean create(String storeName, String tableName) throws IOException {
        Path storePath = new Path(this.storeRootDir, storeName);
        if (!this.fs.exists(storePath) && !create(storeName)) {
            return false;
        }

        Path tablePath = new Path(storePath, tableName);
        if (this.fs.exists(tablePath)) {
            throw new IOException(String.format("State file %s already exists for table %s", tablePath, tableName));
        }

        return this.fs.createNewFile(tablePath);
    }

    @Override
    public boolean exists(String storeName, String tableName) throws IOException {
        Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
        return this.fs.exists(tablePath);
    }

    /**
     * See {@link StateStore#put(String, String, T)}.
     *
     * <p>
     *   This implementation does not support putting the state object into an existing store as
     *   append is to be supported by the Hadoop SequenceFile (HADOOP-7139).
     * </p>
     */
    @Override
    public void put(String storeName, String tableName, T state) throws IOException {
        String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName;
        Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName);

        if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) {
            throw new IOException("Failed to create a state file for table " + tmpTableName);
        }

        Closer closer = Closer.create();
        try {
            @SuppressWarnings("deprecation")
            SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath,
                    Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec()));
            writer.append(new Text(Strings.nullToEmpty(state.getId())), state);
        } catch (Throwable t) {
            throw closer.rethrow(t);
        } finally {
            closer.close();
        }

        if (this.useTmpFileForPut) {
            Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
            HadoopUtils.renamePath(this.fs, tmpTablePath, tablePath);
        }
    }

    /**
     * See {@link StateStore#putAll(String, String, Collection)}.
     *
     * <p>
     *   This implementation does not support putting the state objects into an existing store as
     *   append is to be supported by the Hadoop SequenceFile (HADOOP-7139).
     * </p>
     */
    @Override
    public void putAll(String storeName, String tableName, Collection<T> states) throws IOException {
        String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName;
        Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName);

        if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) {
            throw new IOException("Failed to create a state file for table " + tmpTableName);
        }

        Closer closer = Closer.create();
        try {
            @SuppressWarnings("deprecation")
            SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath,
                    Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec()));
            for (T state : states) {
                writer.append(new Text(Strings.nullToEmpty(state.getId())), state);
            }
        } catch (Throwable t) {
            throw closer.rethrow(t);
        } finally {
            closer.close();
        }

        if (this.useTmpFileForPut) {
            Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
            HadoopUtils.renamePath(this.fs, tmpTablePath, tablePath);
        }
    }

    @Override
    @SuppressWarnings("unchecked")
    public T get(String storeName, String tableName, String stateId) throws IOException {
        Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
        if (!this.fs.exists(tablePath)) {
            return null;
        }

        Closer closer = Closer.create();
        try {
            @SuppressWarnings("deprecation")
            SequenceFile.Reader reader = closer.register(new SequenceFile.Reader(this.fs, tablePath, this.conf));
            try {
                Text key = new Text();
                T state = this.stateClass.newInstance();
                while (reader.next(key)) {
                    state = (T) reader.getCurrentValue(state);
                    if (key.toString().equals(stateId)) {
                        return state;
                    }
                }
            } catch (Exception e) {
                throw new IOException(
                        "failure retrieving state from storeName " + storeName + " tableName " + tableName, e);
            }
        } catch (Throwable t) {
            throw closer.rethrow(t);
        } finally {
            closer.close();
        }

        return null;
    }

    @Override
    @SuppressWarnings("unchecked")
    public List<T> getAll(String storeName, String tableName) throws IOException {
        List<T> states = Lists.newArrayList();

        Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
        if (!this.fs.exists(tablePath)) {
            return states;
        }

        Closer closer = Closer.create();
        try {
            @SuppressWarnings("deprecation")
            SequenceFile.Reader reader = closer.register(new SequenceFile.Reader(this.fs, tablePath, this.conf));
            try {
                Text key = new Text();
                T state = this.stateClass.newInstance();
                while (reader.next(key)) {
                    state = (T) reader.getCurrentValue(state);
                    states.add(state);
                    // We need a new object for each read state
                    state = this.stateClass.newInstance();
                }
            } catch (Exception e) {
                throw new IOException(
                        "failure retrieving state from storeName " + storeName + " tableName " + tableName, e);
            }
        } catch (Throwable t) {
            throw closer.rethrow(t);
        } finally {
            closer.close();
        }

        return states;
    }

    @Override
    public List<T> getAll(String storeName) throws IOException {
        List<T> states = Lists.newArrayList();

        Path storePath = new Path(this.storeRootDir, storeName);
        if (!this.fs.exists(storePath)) {
            return states;
        }

        for (FileStatus status : this.fs.listStatus(storePath)) {
            states.addAll(getAll(storeName, status.getPath().getName()));
        }

        return states;
    }

    @Override
    public List<String> getTableNames(String storeName, Predicate<String> predicate) throws IOException {
        List<String> names = Lists.newArrayList();

        Path storePath = new Path(this.storeRootDir, storeName);
        if (!this.fs.exists(storePath)) {
            return names;
        }

        for (FileStatus status : this.fs.listStatus(storePath)) {
            if (predicate.apply(status.getPath().getName())) {
                names.add(status.getPath().getName());
            }
        }

        return names;
    }

    @Override
    public void createAlias(String storeName, String original, String alias) throws IOException {
        Path originalTablePath = new Path(new Path(this.storeRootDir, storeName), original);
        if (!this.fs.exists(originalTablePath)) {
            throw new IOException(
                    String.format("State file %s does not exist for table %s", originalTablePath, original));
        }

        Path aliasTablePath = new Path(new Path(this.storeRootDir, storeName), alias);
        Path tmpAliasTablePath = new Path(aliasTablePath.getParent(),
                new Path(TMP_FILE_PREFIX, aliasTablePath.getName()));
        // Make a copy of the original table as a work-around because
        // Hadoop version 1.2.1 has no support for symlink yet.
        HadoopUtils.copyFile(this.fs, originalTablePath, this.fs, aliasTablePath, tmpAliasTablePath, true,
                this.conf);
    }

    @Override
    public void delete(String storeName, String tableName) throws IOException {
        Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
        if (this.fs.exists(tablePath)) {
            this.fs.delete(tablePath, false);
        }
    }

    @Override
    public void delete(String storeName) throws IOException {
        Path storePath = new Path(this.storeRootDir, storeName);
        if (this.fs.exists(storePath)) {
            this.fs.delete(storePath, true);
        }
    }
}