com.yata.core.HDFSManager.java Source code

Java tutorial

Introduction

Here is the source code for com.yata.core.HDFSManager.java

Source

/*
 * Copyright 2013 Klarna AB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.yata.core;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.oozie.client.OozieClientException;

import java.io.IOException;
import java.net.ConnectException;
import java.net.URI;
import java.net.URISyntaxException;

public class HDFSManager {

    private static final String className = HDFSManager.class.getSimpleName();

    private final OozieProperties oozieProperties;

    public HDFSManager(String ooziePropertiesFile) throws OozieClientException {

        this.oozieProperties = OozieProperties.getInstance(ooziePropertiesFile);
    }

    private FileSystem getHdfsFileSytem() throws IOException {

        URI hdfsURI = null; //hdfs://hdtmaster1.lrd.cat.com:8020

        try {

            hdfsURI = new URI("hdfs://" + oozieProperties.getProperty("HDFS_HOST") + ":"
                    + this.oozieProperties.getProperty("HDFS_PORT"));
        } catch (URISyntaxException e) {
            throw new IllegalStateException("getHdfsFileSytem@" + className + " : URI Syntax Invalid...");
        }

        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", hdfsURI.getPath());
        conf.set("user.name", this.oozieProperties.getProperty("OOZIE_USER"));
        conf.set("basedir", "/user/" + this.oozieProperties.getProperty("OOZIE_USER"));
        conf.set("hadoop.job.ugi", this.oozieProperties.getProperty("OOZIE_USER"));

        FileSystem hdfs = null;

        for (int i = 0; i < 3; i++) {

            try {

                Thread.sleep(300);
                hdfs = FileSystem.get(hdfsURI, conf, this.oozieProperties.getProperty("OOZIE_USER"));
                break;
            } catch (InterruptedException e) {
                System.out
                        .println("getHdfsFileSytem@" + className + " : InterruptedException while getting HDFS...");
                e.printStackTrace();
            } catch (ConnectException e) {
                System.out.println("getHdfsFileSytem@" + className + " : ConnectException while getting HDFS...");
                e.printStackTrace();
            }
            System.out.println("getHdfsFileSytem@" + className + " : RETRY");
        }

        return hdfs;
    }

    /**
     *
     * @param hdfsTestDataSourceFile
     * @param hdfsTestDataTargetFile
     * @throws IOException
     *
     * hadoop fs -cp /projects/ddsw/dev/data/backup/dealer_hierarchy/<<DOMAIN_NAME>>/<<FILE_NAME>> /projects/ddsw/dev/data/raw/nas/<<DOMAIN_NAME>>
     */
    public void copyHDFSData(String hdfsTestDataSourceFile, String hdfsTestDataTargetFile)
            throws OozieClientException {

        System.out.println("copyHDFSData@" + className + " : Loading Test Data From :-> " + hdfsTestDataSourceFile
                + " : Into :-> " + hdfsTestDataTargetFile);

        FileSystem hdfs = null;
        Path hdfsTestDataSource = null;
        Path hdfsTestDataTarget = null;

        try {

            hdfs = getHdfsFileSytem();

            System.out.println("copyHDFSData@" + className + " : HDFS :-> " + hdfs);

            System.out.println("copyHDFSData@" + className + " : HDFSHomeDirectory :-> " + hdfs.getHomeDirectory());
            System.out.println("copyHDFSData@" + className + " : HDFS-URI :-> " + hdfs.getUri());
            System.out.println(
                    "copyHDFSData@" + className + " : HDFSWorkingDirectory :-> " + hdfs.getWorkingDirectory());
            System.out.println("copyHDFSData@" + className + " : HDFS : " + hdfs + " : Exists :-> "
                    + hdfs.exists(hdfs.getHomeDirectory()));

            hdfsTestDataSource = new Path(hdfs.getUri().getPath() + hdfsTestDataSourceFile);
            hdfsTestDataTarget = new Path(hdfs.getUri().getPath() + hdfsTestDataTargetFile);

            System.out.println("copyHDFSData@" + className + " : HDFS TEST DATA : " + hdfsTestDataSource
                    + " : Exists :-> " + hdfs.exists(hdfsTestDataSource));
            System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget
                    + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget));

        } catch (IOException e) {

            e.printStackTrace();
            throw new OozieClientException("ERR_CODE_1218",
                    "copyHDFSData@" + className + " : IOException while getting HDFS FileSystem - EXITING...");
        }

        FileUtil hdfsUtil = new FileUtil();

        try {

            hdfsUtil.copy(hdfs, hdfsTestDataSource, hdfs, hdfsTestDataTarget, false, true, hdfs.getConf());

            System.out.println("copyHDFSData@" + className + " : NOW : HDFS TEST DATA : " + hdfsTestDataSource
                    + " : Exists :-> " + hdfs.exists(hdfsTestDataSource));
            System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget
                    + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget));

        } catch (IOException e) {

            e.printStackTrace();
            throw new OozieClientException("ERR_CODE_1218",
                    "copyHDFSData@" + className + " : IOException while Copying HDFS Data - EXITING...");
        }

        /**
         * IMPORTANT
         * If the Source Data file on HDFS is not owned by the Hive/Hadoop User, then use the command below to
         * change the permission for Hive/Hadoop User to move/delete the file once processed...
         */
        try {

            hdfs.setPermission(hdfsTestDataTarget,
                    new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.READ_EXECUTE));
        } catch (IOException e) {

            e.printStackTrace();
            throw new OozieClientException("ERR_CODE_1218", "copyHDFSData@" + className
                    + " : IOException while Changing HDFS File Permissions - EXITING...");
        }

    }
}