gobblin.data.management.copy.replication.ReplicationDataValidPathPicker.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.data.management.copy.replication.ReplicationDataValidPathPicker.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.data.management.copy.replication;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.commons.lang3.reflect.ConstructorUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import com.google.common.collect.Ordering;
import com.typesafe.config.Config;

import gobblin.data.management.policy.VersionSelectionPolicy;
import gobblin.data.management.version.FileSystemDatasetVersion;
import gobblin.data.management.version.finder.VersionFinder;
import gobblin.dataset.FileSystemDataset;

/**
 * Used to pick the valid Paths for data replication based on {@link Config}
 * @author mitu
 *
 */
public class ReplicationDataValidPathPicker {

    public static final String POLICY_CLASS = "selection.policy.class";

    public static final String FINDER_CLASS = "version.finder.class";

    @SuppressWarnings("unchecked")
    public static Collection<Path> getValidPaths(HadoopFsEndPoint hadoopFsEndPoint) throws IOException {
        Config selectionConfig = hadoopFsEndPoint.getSelectionConfig();

        FileSystemDataset tmpDataset = new HadoopFsEndPointDataset(hadoopFsEndPoint);
        FileSystem theFs = FileSystem.get(hadoopFsEndPoint.getFsURI(), new Configuration());

        /**
         * Use {@link FileSystemDatasetVersion} as
         * {@link DateTimeDatasetVersionFinder} / {@link GlobModTimeDatasetVersionFinder} use {@link TimestampedDatasetVersion}
         * {@link SingleVersionFinder} uses {@link FileStatusDatasetVersion}
         */
        VersionFinder<FileSystemDatasetVersion> finder;
        try {
            finder = (VersionFinder<FileSystemDatasetVersion>) ConstructorUtils.invokeConstructor(
                    Class.forName(selectionConfig.getString(FINDER_CLASS)), theFs, selectionConfig);
        } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException
                | ClassNotFoundException e) {
            throw new IllegalArgumentException(e);
        }

        List<FileSystemDatasetVersion> versions = Ordering.natural().reverse()
                .sortedCopy(finder.findDatasetVersions(tmpDataset));

        VersionSelectionPolicy<FileSystemDatasetVersion> selector;
        try {
            selector = (VersionSelectionPolicy<FileSystemDatasetVersion>) ConstructorUtils
                    .invokeConstructor(Class.forName(selectionConfig.getString(POLICY_CLASS)), selectionConfig);
        } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException
                | ClassNotFoundException e) {
            throw new IllegalArgumentException(e);
        }

        Collection<FileSystemDatasetVersion> versionsSelected = selector.listSelectedVersions(versions);

        List<Path> result = new ArrayList<Path>();
        for (FileSystemDatasetVersion t : versionsSelected) {
            // get the first element out
            result.add(t.getPaths().iterator().next());
        }
        return result;
    }
}