com.teradata.tempto.internal.hadoop.hdfs.DefaultHdfsDataSourceWriter.java Source code

Java tutorial

Introduction

Here is the source code for com.teradata.tempto.internal.hadoop.hdfs.DefaultHdfsDataSourceWriter.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.teradata.tempto.internal.hadoop.hdfs;

import com.google.common.base.Stopwatch;
import com.teradata.tempto.fulfillment.hive.DataSource;
import com.teradata.tempto.hadoop.hdfs.HdfsClient;
import com.teradata.tempto.hadoop.hdfs.HdfsClient.RepeatableContentProducer;
import com.teradata.tempto.internal.hadoop.hdfs.revisions.RevisionStorage;
import org.slf4j.Logger;

import javax.inject.Inject;
import javax.inject.Named;

import java.util.Optional;
import java.util.concurrent.TimeUnit;

import static org.slf4j.LoggerFactory.getLogger;

public class DefaultHdfsDataSourceWriter implements HdfsDataSourceWriter {

    private static final Logger LOGGER = getLogger(DefaultHdfsDataSourceWriter.class);

    private final HdfsClient hdfsClient;
    private final String hdfsUsername;
    private final RevisionStorage revisionStorage;

    @Inject
    public DefaultHdfsDataSourceWriter(HdfsClient hdfsClient, RevisionStorage revisionStorage,
            @Named("hdfs.username") String hdfsUsername) {
        this.hdfsClient = hdfsClient;
        this.hdfsUsername = hdfsUsername;
        this.revisionStorage = revisionStorage;
    }

    @Override
    public void ensureDataOnHdfs(String dataSourcePath, DataSource dataSource) {
        if (isDataUpToDate(dataSourcePath, dataSource)) {
            return;
        }

        revisionStorage.remove(dataSourcePath);
        hdfsClient.delete(dataSourcePath, hdfsUsername);
        hdfsClient.createDirectory(dataSourcePath, hdfsUsername);
        storeTableFiles(dataSourcePath, dataSource);
        revisionStorage.put(dataSourcePath, dataSource.revisionMarker());
    }

    private boolean isDataUpToDate(String dataSourcePath, DataSource dataSource) {
        Stopwatch stopwatch = Stopwatch.createStarted();
        Optional<String> storedRevisionMarker = revisionStorage.get(dataSourcePath);
        LOGGER.debug("revisionMarker.get(\"{}\") took {}ms", dataSourcePath,
                stopwatch.elapsed(TimeUnit.MILLISECONDS));
        if (storedRevisionMarker.isPresent()) {
            if (storedRevisionMarker.get().equals(dataSource.revisionMarker())) {
                LOGGER.debug("Directory {} ({}) already exists, skipping generation of data", dataSourcePath,
                        storedRevisionMarker.get());
                return true;
            } else {
                LOGGER.info(
                        "Directory {} ({}) already exists, but has different revision marker than expected: {}, so data will be regenerated",
                        dataSourcePath, storedRevisionMarker.get(), dataSource.revisionMarker());
            }
        }
        return false;
    }

    private void storeTableFiles(String dataSourcePath, DataSource dataSource) {
        int fileIndex = 0;
        for (RepeatableContentProducer fileContent : dataSource.data()) {
            String filePath = dataSourcePath + "/data_" + fileIndex;
            LOGGER.debug("Saving new file {} ({})", filePath, dataSource.revisionMarker());
            hdfsClient.saveFile(filePath, hdfsUsername, fileContent);
            fileIndex++;
        }
    }
}