gobblin.compliance.HivePartitionVersionFinder.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.compliance.HivePartitionVersionFinder.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package gobblin.compliance;

import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.security.UserGroupInformation;

import com.google.common.base.Optional;
import com.google.common.base.Preconditions;

import lombok.extern.slf4j.Slf4j;

import gobblin.compliance.purger.HivePurgerQueryTemplate;
import gobblin.compliance.retention.HivePartitionRetentionVersion;
import gobblin.compliance.utils.ProxyUtils;
import gobblin.configuration.State;
import gobblin.data.management.copy.hive.HiveDataset;
import gobblin.data.management.copy.hive.HiveDatasetFinder;
import gobblin.dataset.Dataset;

/**
 * A version finder class to find {@link HivePartitionVersion}s.
 *
 * @author adsharma
 */
@Slf4j
public class HivePartitionVersionFinder
        implements gobblin.data.management.version.finder.VersionFinder<HivePartitionVersion> {
    protected final FileSystem fs;
    protected final State state;
    protected List<String> patterns;
    private Optional<String> owner = Optional.absent();
    private List<HivePartitionVersion> versions = new ArrayList<>();

    public HivePartitionVersionFinder(FileSystem fs, State state, List<String> patterns) {
        this.fs = fs;
        this.state = new State(state);
        this.patterns = patterns;
    }

    @Override
    public Class<HivePartitionVersion> versionClass() {
        return HivePartitionVersion.class;
    }

    /**
     * Will find all the versions of the {@link HivePartitionDataset}.
     *
     * For a dataset with table name table1, corresponding versions table will be
     * table1_backup_timestamp or table1_staging_timestamp or table1_trash_timestamp
     *
     * Based on pattern, a type of version will be selected eg. backup or trash or staging
     *
     * If a Hive version's table contain no Partitions, it will be dropped.
     */
    @Override
    public Collection<HivePartitionVersion> findDatasetVersions(Dataset dataset) throws IOException {
        List<HivePartitionVersion> versions = new ArrayList<>();
        if (!(dataset instanceof HivePartitionDataset)) {
            return versions;
        }
        HivePartitionDataset hivePartitionDataset = (HivePartitionDataset) dataset;
        this.owner = hivePartitionDataset.getOwner();
        Preconditions.checkArgument(!this.patterns.isEmpty(),
                "No patterns to find versions for the dataset " + dataset.datasetURN());

        versions.addAll(findVersions(hivePartitionDataset.getName(), hivePartitionDataset.datasetURN()));
        return versions;
    }

    private List<HivePartitionVersion> findVersions(String name, String urn) throws IOException {
        State state = new State(this.state);
        Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.HIVE_VERSIONS_WHITELIST),
                "Missing required property " + ComplianceConfigurationKeys.HIVE_VERSIONS_WHITELIST);

        state.setProp(ComplianceConfigurationKeys.HIVE_DATASET_WHITELIST,
                this.state.getProp(ComplianceConfigurationKeys.HIVE_VERSIONS_WHITELIST));
        setVersions(name, state);
        log.info("Found " + this.versions.size() + " versions for the dataset " + urn);
        return this.versions;
    }

    private void addPartitionsToVersions(List<HivePartitionVersion> versions, String name, HiveDataset hiveDataset,
            List<Partition> partitions) throws IOException {
        if (partitions.isEmpty()) {
            if (Boolean.parseBoolean(this.state.getProp(ComplianceConfigurationKeys.SHOULD_DROP_EMPTY_TABLES,
                    ComplianceConfigurationKeys.DEFAULT_SHOULD_DROP_EMPTY_TABLES))) {
                executeDropTableQuery(hiveDataset);
            }
            return;
        }
        for (Partition partition : partitions) {
            if (partition.getName().equalsIgnoreCase(name)) {
                versions.add(new HivePartitionRetentionVersion(partition));
            }
        }
    }

    private void executeDropTableQuery(HiveDataset hiveDataset) throws IOException {
        String dbName = hiveDataset.getTable().getDbName();
        String tableName = hiveDataset.getTable().getTableName();
        Optional<String> datasetOwner = Optional.fromNullable(hiveDataset.getTable().getOwner());
        try (HiveProxyQueryExecutor hiveProxyQueryExecutor = ProxyUtils.getQueryExecutor(new State(this.state),
                datasetOwner)) {
            hiveProxyQueryExecutor.executeQuery(HivePurgerQueryTemplate.getDropTableQuery(dbName, tableName),
                    datasetOwner);
        } catch (SQLException e) {
            throw new IOException(e);
        }
    }

    private void setVersions(final String name, final State state) throws IOException {
        try {
            UserGroupInformation loginUser = UserGroupInformation.getLoginUser();
            loginUser.doAs(new PrivilegedExceptionAction<Void>() {
                @Override
                public Void run() throws IOException {
                    HiveDatasetFinder finder = new HiveDatasetFinder(fs, state.getProperties());
                    for (HiveDataset hiveDataset : finder.findDatasets()) {
                        List<Partition> partitions = hiveDataset.getPartitionsFromDataset();
                        for (String pattern : patterns) {
                            if (hiveDataset.getTable().getTableName().contains(pattern)) {
                                addPartitionsToVersions(versions, name, hiveDataset, partitions);
                            }
                        }
                    }
                    return null;
                }
            });
        } catch (InterruptedException | IOException e) {
            throw new IOException(e);
        }
    }
}