org.apache.gobblin.runtime.spec_store.FSSpecStore.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.gobblin.runtime.spec_store.FSSpecStore.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.gobblin.runtime.spec_store;

import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.io.ByteStreams;
import com.google.common.io.Files;
import com.typesafe.config.Config;
import java.io.IOException;
import java.net.URI;
import java.util.Collection;
import java.util.Iterator;
import org.apache.gobblin.runtime.api.FlowSpec;
import org.apache.gobblin.runtime.api.GobblinInstanceEnvironment;
import org.apache.gobblin.runtime.api.Spec;
import org.apache.gobblin.runtime.api.SpecNotFoundException;
import org.apache.gobblin.runtime.api.SpecSerDe;
import org.apache.gobblin.runtime.api.SpecStore;
import org.apache.gobblin.util.PathUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The Spec Store for file system to persist the Spec information.
 * Note:
 * 1. This implementation has no support for caching.
 * 2. This implementation does not performs implicit version management.
 *    For implicit version management, please use a wrapper FSSpecStore.
 */
public class FSSpecStore implements SpecStore {

    /***
     * Configuration properties related to Spec Store
     */
    public static final String SPECSTORE_FS_DIR_KEY = "specStore.fs.dir";

    protected final Logger log;
    protected final Config sysConfig;
    protected final FileSystem fs;
    protected final String fsSpecStoreDir;
    protected final Path fsSpecStoreDirPath;
    protected final SpecSerDe specSerDe;

    public FSSpecStore(GobblinInstanceEnvironment env, SpecSerDe specSerDe) throws IOException {
        this(env.getSysConfig().getConfig(), specSerDe, Optional.<Logger>absent());
    }

    public FSSpecStore(Config sysConfig, SpecSerDe specSerDe) throws IOException {
        this(sysConfig, specSerDe, Optional.<Logger>absent());
    }

    public FSSpecStore(GobblinInstanceEnvironment env, SpecSerDe specSerDe, Optional<Logger> log)
            throws IOException {
        this(env.getSysConfig().getConfig(), specSerDe, log);
    }

    public FSSpecStore(Config sysConfig, SpecSerDe specSerDe, Optional<Logger> log) throws IOException {
        Preconditions.checkArgument(sysConfig.hasPath(SPECSTORE_FS_DIR_KEY),
                "FS SpecStore path must be specified.");

        this.log = log.isPresent() ? log.get() : LoggerFactory.getLogger(getClass());
        this.sysConfig = sysConfig;
        this.specSerDe = specSerDe;
        this.fsSpecStoreDir = this.sysConfig.getString(SPECSTORE_FS_DIR_KEY);
        this.fsSpecStoreDirPath = new Path(this.fsSpecStoreDir);
        this.log.info("FSSpecStore directory is: " + this.fsSpecStoreDir);
        try {
            this.fs = this.fsSpecStoreDirPath.getFileSystem(new Configuration());
        } catch (IOException e) {
            throw new RuntimeException("Unable to detect job config directory file system: " + e, e);
        }
        if (!this.fs.exists(this.fsSpecStoreDirPath)) {
            this.log.info("FSSpecStore directory: " + this.fsSpecStoreDir + " did not exist. Creating it.");
            this.fs.mkdirs(this.fsSpecStoreDirPath);
        }
    }

    /**
     * @param specUri path of the spec
     * @return empty string for topology spec, as topologies do not have a group,
     *         group name for flow spec
     */
    public static String getSpecGroup(Path specUri) {
        return specUri.getParent().getName();
    }

    public static String getSpecName(Path specUri) {
        return Files.getNameWithoutExtension(specUri.getName());
    }

    private Collection<Spec> getAllVersionsOfSpec(Path spec) {
        Collection<Spec> specs = Lists.newArrayList();

        try {
            specs.add(readSpecFromFile(spec));
        } catch (IOException e) {
            log.warn("Spec {} not found.", spec);
        }
        return specs;
    }

    /**
     * Returns all versions of the spec defined by specUri.
     * Currently, multiple versions are not supported, so this should return exactly one spec.
     * @param specUri URI for the {@link Spec} to be retrieved.
     * @return all versions of the spec.
     */
    @Override
    public Collection<Spec> getAllVersionsOfSpec(URI specUri) {
        Preconditions.checkArgument(null != specUri, "Spec URI should not be null");
        Path specPath = getPathForURI(this.fsSpecStoreDirPath, specUri, FlowSpec.Builder.DEFAULT_VERSION);
        return getAllVersionsOfSpec(specPath);
    }

    @Override
    public boolean exists(URI specUri) throws IOException {
        Preconditions.checkArgument(null != specUri, "Spec URI should not be null");

        Path specPath = getPathForURI(this.fsSpecStoreDirPath, specUri, FlowSpec.Builder.DEFAULT_VERSION);
        return fs.exists(specPath);
    }

    @Override
    public void addSpec(Spec spec) throws IOException {
        Preconditions.checkArgument(null != spec, "Spec should not be null");

        log.info(String.format("Adding Spec with URI: %s in FSSpecStore: %s", spec.getUri(),
                this.fsSpecStoreDirPath));
        Path specPath = getPathForURI(this.fsSpecStoreDirPath, spec.getUri(), spec.getVersion());
        writeSpecToFile(specPath, spec);
    }

    @Override
    public boolean deleteSpec(Spec spec) throws IOException {
        Preconditions.checkArgument(null != spec, "Spec should not be null");

        return deleteSpec(spec.getUri(), spec.getVersion());
    }

    @Override
    public boolean deleteSpec(URI specUri) throws IOException {
        Preconditions.checkArgument(null != specUri, "Spec URI should not be null");

        return deleteSpec(specUri, FlowSpec.Builder.DEFAULT_VERSION);
    }

    @Override
    public boolean deleteSpec(URI specUri, String version) throws IOException {
        Preconditions.checkArgument(null != specUri, "Spec URI should not be null");
        Preconditions.checkArgument(null != version, "Version should not be null");

        try {
            log.info(String.format("Deleting Spec with URI: %s in FSSpecStore: %s", specUri,
                    this.fsSpecStoreDirPath));
            Path specPath = getPathForURI(this.fsSpecStoreDirPath, specUri, version);
            return fs.delete(specPath, false);
        } catch (IOException e) {
            throw new IOException(String.format("Issue in removing Spec: %s for Version: %s", specUri, version), e);
        }
    }

    @Override
    public Spec updateSpec(Spec spec) throws IOException, SpecNotFoundException {
        addSpec(spec);
        return spec;
    }

    @Override
    public Spec getSpec(URI specUri) throws SpecNotFoundException {
        Preconditions.checkArgument(null != specUri, "Spec URI should not be null");

        Collection<Spec> specs = getAllVersionsOfSpec(specUri);
        Spec highestVersionSpec = null;

        for (Spec spec : specs) {
            if (null == highestVersionSpec) {
                highestVersionSpec = spec;
            } else if (null != spec.getVersion() && spec.getVersion().compareTo(spec.getVersion()) > 0) {
                highestVersionSpec = spec;
            }
        }

        if (null == highestVersionSpec) {
            throw new SpecNotFoundException(specUri);
        }

        return highestVersionSpec;
    }

    @Override
    public Spec getSpec(URI specUri, String version) throws IOException, SpecNotFoundException {
        Preconditions.checkArgument(null != specUri, "Spec URI should not be null");
        Preconditions.checkArgument(null != version, "Version should not be null");

        Path specPath = getPathForURI(this.fsSpecStoreDirPath, specUri, version);

        if (!fs.exists(specPath)) {
            throw new SpecNotFoundException(specUri);
        }

        return readSpecFromFile(specPath);
    }

    @Override
    public Collection<Spec> getSpecs() throws IOException {
        Collection<Spec> specs = Lists.newArrayList();
        try {
            getSpecs(this.fsSpecStoreDirPath, specs);
        } catch (Exception e) {
            throw new IOException(e);
        }

        return specs;
    }

    @Override
    public Iterator<URI> getSpecURIs() throws IOException {
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(this.fsSpecStoreDirPath, true);
        return new Iterator<URI>() {
            @Override
            public boolean hasNext() {
                try {
                    return it.hasNext();
                } catch (IOException ioe) {
                    throw new RuntimeException("Failed to determine if there's next element available due to:",
                            ioe);
                }
            }

            @Override
            public URI next() {
                try {
                    return getURIFromPath(it.next().getPath(), fsSpecStoreDirPath);
                } catch (IOException ioe) {
                    throw new RuntimeException("Failed to fetch next element due to:", ioe);
                }
            }
        };
    }

    @Override
    public Iterator<URI> getSpecURIsWithTag(String tag) throws IOException {
        throw new UnsupportedOperationException(
                "Loading specs with tag is not supported in FS-Implementation of SpecStore");
    }

    @Override
    public Optional<URI> getSpecStoreURI() {
        return Optional.of(this.fsSpecStoreDirPath.toUri());
    }

    /**
     * For multiple {@link FlowSpec}s to be loaded, catch Exceptions when one of them failed to be loaded and
     * continue with the rest.
     *
     * The {@link IOException} thrown from standard FileSystem call will be propagated, while the file-specific
     * exception will be caught to ensure other files being able to deserialized.
     *
     * @param directory The directory that contains specs to be deserialized
     * @param specs Container of specs.
     */
    private void getSpecs(Path directory, Collection<Spec> specs) throws Exception {
        FileStatus[] fileStatuses = fs.listStatus(directory);
        for (FileStatus fileStatus : fileStatuses) {
            if (fileStatus.isDirectory()) {
                getSpecs(fileStatus.getPath(), specs);
            } else {
                try {
                    specs.add(readSpecFromFile(fileStatus.getPath()));
                } catch (Exception e) {
                    log.warn(String.format("Path[%s] cannot be correctly deserialized as Spec",
                            fileStatus.getPath()), e);
                }
            }
        }
    }

    /***
     * Read and deserialized Spec from a file.
     * @param path File containing serialized Spec.
     * @return Spec
     * @throws IOException
     */
    protected Spec readSpecFromFile(Path path) throws IOException {
        Spec spec;

        try (FSDataInputStream fis = fs.open(path)) {
            spec = this.specSerDe.deserialize(ByteStreams.toByteArray(fis));
        }

        return spec;
    }

    /***
     * Serialize and write Spec to a file.
     * @param specPath Spec file name.
     * @param spec Spec object to write.
     * @throws IOException
     */
    protected void writeSpecToFile(Path specPath, Spec spec) throws IOException {
        byte[] serializedSpec = this.specSerDe.serialize(spec);
        try (FSDataOutputStream os = fs.create(specPath, true)) {
            os.write(serializedSpec);
        }
    }

    /**
     * Construct a file path given URI and version of a spec.
     *
     * @param fsSpecStoreDirPath The directory path for specs.
     * @param uri Uri as the identifier of JobSpec
     * @return
     */
    protected Path getPathForURI(Path fsSpecStoreDirPath, URI uri, String version) {
        return PathUtils.addExtension(PathUtils.mergePaths(fsSpecStoreDirPath, new Path(uri)), version);
    }

    /**
     * Recover {@link Spec}'s URI from a file path.
     * Note that there's no version awareness of this method, as Spec's version is currently not supported.
     *
     * @param fsPath The given file path to get URI from.
     * @return The exact URI of a Spec.
     */
    protected URI getURIFromPath(Path fsPath, Path fsSpecStoreDirPath) {
        return PathUtils.relativizePath(fsPath, fsSpecStoreDirPath).toUri();
    }
}