gobblin.data.management.version.finder.AbstractHiveDatasetVersionFinder.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.data.management.version.finder.AbstractHiveDatasetVersionFinder.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package gobblin.data.management.version.finder;

import java.io.IOException;
import java.util.Collection;
import java.util.List;

import lombok.extern.slf4j.Slf4j;

import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.ql.metadata.Partition;

import com.google.common.base.Function;
import com.google.common.base.Optional;
import com.google.common.base.Predicates;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;

import gobblin.data.management.copy.hive.HiveDataset;
import gobblin.data.management.copy.hive.HiveUtils;
import gobblin.data.management.version.DatasetVersion;
import gobblin.data.management.version.HiveDatasetVersion;
import gobblin.dataset.Dataset;
import gobblin.util.AutoReturnableObject;

/**
 * An abstract {@link VersionFinder} to create {@link HiveDatasetVersion}s for all {@link Partition}s of a {@link HiveDataset}.
 * Calls {@link #getDatasetVersion(Partition)} for every {@link Partition} found.
 */
@Slf4j
public abstract class AbstractHiveDatasetVersionFinder implements VersionFinder<HiveDatasetVersion> {

    @Override
    public Class<? extends DatasetVersion> versionClass() {
        return HiveDatasetVersion.class;
    }

    /**
     * Create {@link HiveDatasetVersion}s for all {@link Partition}s of a {@link HiveDataset}.
     * Calls {@link #getDatasetVersion(Partition)} for every {@link Partition} found.
     * <p>
     * Note: If an exception occurs while processing a partition, that partition will be ignored in the returned collection
     * </p>
     *
     * @throws IllegalArgumentException if <code>dataset</code> is not a {@link HiveDataset}. Or if {@link HiveDataset#getTable()}
     * is not partitioned.
     */
    @Override
    public Collection<HiveDatasetVersion> findDatasetVersions(Dataset dataset) throws IOException {
        if (!(dataset instanceof HiveDataset)) {
            throw new IllegalArgumentException("HiveDatasetVersionFinder is only compatible with HiveDataset");
        }
        final HiveDataset hiveDataset = (HiveDataset) dataset;

        if (!HiveUtils.isPartitioned(hiveDataset.getTable())) {
            throw new IllegalArgumentException(
                    "HiveDatasetVersionFinder is only compatible with partitioned hive tables");
        }

        try (AutoReturnableObject<IMetaStoreClient> client = hiveDataset.getClientPool().getClient()) {

            List<Partition> partitions = HiveUtils.getPartitions(client.get(), hiveDataset.getTable(),
                    Optional.<String>absent());
            return Lists.newArrayList(
                    Iterables.filter(Iterables.transform(partitions, new Function<Partition, HiveDatasetVersion>() {

                        @Override
                        public HiveDatasetVersion apply(Partition partition) {
                            try {
                                return getDatasetVersion(partition);
                            } catch (Throwable e) {
                                log.warn(String.format("Failed to get DatasetVersion %s. Skipping.",
                                        partition.getCompleteName()), e);
                                return null;
                            }
                        }
                    }), Predicates.notNull()));
        }
    }

    /**
     *
     * Create a {@link HiveDatasetVersion} for the {@link Partition}
     * @param partition for which a {@link HiveDatasetVersion} is created
     */
    protected abstract HiveDatasetVersion getDatasetVersion(Partition partition);

}