Example usage for org.apache.hadoop.security UserGroupInformation loginUserFromKeytab

List of usage examples for org.apache.hadoop.security UserGroupInformation loginUserFromKeytab

Introduction

In this page you can find the example usage for org.apache.hadoop.security UserGroupInformation loginUserFromKeytab.

Prototype

@InterfaceAudience.Public
@InterfaceStability.Evolving
public static void loginUserFromKeytab(String user, String path) throws IOException 

Source Link

Document

Log a user in from a keytab file.

Usage

From source file:ruciotools.WebRucioGrep.java

License:Apache License

/**
 * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
 *///from  w  w w  .j  a va2  s  . co m
protected void doGet(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    final PrintWriter out = response.getWriter();

    Enumeration<String> parameterNames = request.getParameterNames();
    List<String> params = new ArrayList<String>();
    while (parameterNames.hasMoreElements()) {
        String paramName = parameterNames.nextElement();
        for (String v : request.getParameterValues(paramName)) {
            params.add("-" + paramName);
            params.add(v);
        }

    }
    final String[] args = new String[params.size()];
    params.toArray(args);

    FileSystem fs = DistributedFileSystem.get(new Configuration());
    FSDataOutputStream of1 = fs.create(new Path("/user/rucio01/log/test-MR-before.ralph"));
    of1.write(new String("ralph").getBytes());
    of1.close();

    System.out.println("--------------status---:" + UserGroupInformation.isLoginKeytabBased());
    System.out.println("--------------current user---:" + UserGroupInformation.getCurrentUser());
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
    boolean isKeyTab = false; //ugi.isFromKeytab();
    if (isKeyTab) {
        ugi.checkTGTAndReloginFromKeytab();
    } else {
        UserGroupInformation.loginUserFromKeytab("rucio01", "/etc/hadoop/conf/rucio01.keytab");
        isKeyTab = UserGroupInformation.isLoginKeytabBased();
        if (isKeyTab) {
            ugi = UserGroupInformation.getCurrentUser();
        }
    }
    System.out.println("---------AFTER LOGIN-----:");
    System.out.println("--------------status---:" + UserGroupInformation.isLoginKeytabBased());
    System.out.println("--------------current user---:" + UserGroupInformation.getCurrentUser());

    //FileSystem fs = DistributedFileSystem.get(new Configuration());
    FSDataOutputStream of = fs.create(new Path("/user/rucio01/log/test-MR-outer.ralph"));
    of.write(new String("ralph").getBytes());
    of.close();

    try {
        ugi.doAs(new PrivilegedExceptionAction<Void>() {
            public Void run() throws Exception {

                FileSystem fs = DistributedFileSystem.get(new Configuration());
                FSDataOutputStream of = fs.create(new Path("/user/rucio01/log/test-MR-inner.ralph"));
                of.write(new String("ralph").getBytes());
                of.close();

                // Verify input parameters
                Map<String, Object> settings = Grep.parseCommandLineArguments(args);
                if ((Boolean) settings.get("printUsage")) {
                    out.println((String) settings.get("errorMessage"));
                    out.println(Grep.printUsage());
                    return null;
                }

                // Derive tmp dir for job output
                settings.put("tempDir",
                        new Path("rucio-grep-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))));

                // Execute MR job
                try {
                    if (!Grep.runJob(settings)) {
                        out.println("Something went wrong :-(\n");
                        out.println(
                                "Hints: (1) do not redirect stderr to /dev/null (2)  consider setting -excludeTmpFiles in case of IOExceptions\n");
                    }
                } catch (Exception e) {
                    out.println(e);
                    return null;
                }
                try {
                    out.println(Grep.getResults(settings));
                } catch (Exception e) {
                    out.println("No job output found in " + settings.get("tempDir").toString());
                    out.println(e);
                }
                return null;
            }
        });
    } catch (Exception e) {
        System.out.println(e);
    }
}

From source file:skewtune.mapreduce.STJobTracker.java

License:Apache License

@SuppressWarnings("unchecked")
STJobTracker(final JobConf conf, String jobtrackerIndentifier) throws IOException, InterruptedException {
    // find the owner of the process
    // get the desired principal to load
    String keytabFilename = conf.get(JTConfig.JT_KEYTAB_FILE);
    UserGroupInformation.setConfiguration(conf);
    if (keytabFilename != null) {
        String desiredUser = conf.get(JTConfig.JT_USER_NAME, System.getProperty("user.name"));
        UserGroupInformation.loginUserFromKeytab(desiredUser, keytabFilename);
        mrOwner = UserGroupInformation.getLoginUser();
    } else {/*from ww  w  .  ja va 2  s.c om*/
        mrOwner = UserGroupInformation.getCurrentUser();
    }

    supergroup = conf.get(MR_SUPERGROUP, "supergroup");
    LOG.info("Starting jobtracker with owner as " + mrOwner.getShortUserName() + " and supergroup as "
            + supergroup);

    long secretKeyInterval = conf.getLong(MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_KEY,
            MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT);
    long tokenMaxLifetime = conf.getLong(MRConfig.DELEGATION_TOKEN_MAX_LIFETIME_KEY,
            MRConfig.DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT);
    long tokenRenewInterval = conf.getLong(MRConfig.DELEGATION_TOKEN_RENEW_INTERVAL_KEY,
            MRConfig.DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT);
    secretManager = new DelegationTokenSecretManager(secretKeyInterval, tokenMaxLifetime, tokenRenewInterval,
            DELEGATION_TOKEN_GC_INTERVAL);
    secretManager.startThreads();

    //
    // Grab some static constants
    //

    NUM_HEARTBEATS_IN_SECOND = conf.getInt(JT_HEARTBEATS_IN_SECOND, DEFAULT_NUM_HEARTBEATS_IN_SECOND);
    if (NUM_HEARTBEATS_IN_SECOND < MIN_NUM_HEARTBEATS_IN_SECOND) {
        NUM_HEARTBEATS_IN_SECOND = DEFAULT_NUM_HEARTBEATS_IN_SECOND;
    }

    HEARTBEATS_SCALING_FACTOR = conf.getFloat(JT_HEARTBEATS_SCALING_FACTOR, DEFAULT_HEARTBEATS_SCALING_FACTOR);
    if (HEARTBEATS_SCALING_FACTOR < MIN_HEARTBEATS_SCALING_FACTOR) {
        HEARTBEATS_SCALING_FACTOR = DEFAULT_HEARTBEATS_SCALING_FACTOR;
    }

    // whether to dump or not every heartbeat message even when DEBUG is enabled
    dumpHeartbeat = conf.getBoolean(JT_HEARTBEATS_DUMP, false);

    // This is a directory of temporary submission files. We delete it
    // on startup, and can delete any files that we're done with
    this.conf = conf;
    JobConf jobConf = new JobConf(conf);

    // Set ports, start RPC servers, setup security policy etc.
    InetSocketAddress addr = getAddress(conf);
    this.localMachine = addr.getHostName();
    this.port = addr.getPort();

    int handlerCount = conf.getInt(JT_IPC_HANDLER_COUNT, 10);
    this.interTrackerServer = RPC.getServer(SkewTuneClientProtocol.class, this, addr.getHostName(),
            addr.getPort(), handlerCount, false, conf, secretManager);
    if (LOG.isDebugEnabled()) {
        Properties p = System.getProperties();
        for (Iterator it = p.keySet().iterator(); it.hasNext();) {
            String key = (String) it.next();
            String val = p.getProperty(key);
            LOG.debug("Property '" + key + "' is " + val);
        }
    }

    InetSocketAddress infoSocAddr = NetUtils
            .createSocketAddr(conf.get(JT_HTTP_ADDRESS, String.format("%s:0", this.localMachine)));
    String infoBindAddress = infoSocAddr.getHostName();
    int tmpInfoPort = infoSocAddr.getPort();
    this.startTime = System.currentTimeMillis();
    infoServer = new HttpServer("job", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf);
    infoServer.setAttribute("job.tracker", this);
    infoServer.addServlet("jobcompletion", "/completion", JobCompletionServlet.class);
    infoServer.addServlet("taskspeculation", "/speculation", SpeculationEventServlet.class);
    infoServer.addServlet("skewreport", "/skew", SkewReportServlet.class);
    infoServer.addServlet("tasksplit", "/split/*", SplitTaskServlet.class);
    infoServer.addServlet("tasksplitV2", "/splitV2/*", SplitTaskV2Servlet.class);
    infoServer.start();

    this.trackerIdentifier = jobtrackerIndentifier;

    // The rpc/web-server ports can be ephemeral ports...
    // ... ensure we have the correct info
    this.port = interTrackerServer.getListenerAddress().getPort();
    this.conf.set(JT_IPC_ADDRESS, (this.localMachine + ":" + this.port));
    LOG.info("JobTracker up at: " + this.port);
    this.infoPort = this.infoServer.getPort();
    this.conf.set(JT_HTTP_ADDRESS, infoBindAddress + ":" + this.infoPort);
    LOG.info("JobTracker webserver: " + this.infoServer.getPort());
    this.defaultNotificationUrl = String.format("http://%s:%d/completion?jobid=$jobId&status=$jobStatus",
            infoBindAddress, this.infoPort);
    LOG.info("JobTracker completion URI: " + defaultNotificationUrl);
    //        this.defaultSpeculationEventUrl = String.format("http://%s:%d/speculation?taskid=$taskId&remainTime=$taskRemainTime",infoBindAddress,this.infoPort);
    this.defaultSpeculationEventUrl = String.format("http://%s:%d/speculation?jobid=$jobId", infoBindAddress,
            this.infoPort);
    LOG.info("JobTracker speculation event URI: " + defaultSpeculationEventUrl);
    this.defaultSkewReportUrl = String.format("http://%s:%d/skew", infoBindAddress, this.infoPort);
    LOG.info("JobTracker skew report event URI: " + defaultSkewReportUrl);
    this.trackerHttp = String.format("http://%s:%d", infoBindAddress, this.infoPort);

    while (!Thread.currentThread().isInterrupted()) {
        try {
            // if we haven't contacted the namenode go ahead and do it
            if (fs == null) {
                fs = mrOwner.doAs(new PrivilegedExceptionAction<FileSystem>() {
                    @Override
                    public FileSystem run() throws IOException {
                        return FileSystem.get(conf);
                    }
                });
            }

            // clean up the system dir, which will only work if hdfs is out
            // of safe mode
            if (systemDir == null) {
                systemDir = new Path(getSystemDir());
            }
            try {
                FileStatus systemDirStatus = fs.getFileStatus(systemDir);
                if (!systemDirStatus.getOwner().equals(mrOwner.getShortUserName())) {
                    throw new AccessControlException(
                            "The systemdir " + systemDir + " is not owned by " + mrOwner.getShortUserName());
                }
                if (!systemDirStatus.getPermission().equals(SYSTEM_DIR_PERMISSION)) {
                    LOG.warn("Incorrect permissions on " + systemDir + ". Setting it to "
                            + SYSTEM_DIR_PERMISSION);
                    fs.setPermission(systemDir, new FsPermission(SYSTEM_DIR_PERMISSION));
                } else {
                    break;
                }
            } catch (FileNotFoundException fnf) {
            } // ignore
        } catch (AccessControlException ace) {
            LOG.warn("Failed to operate on " + JTConfig.JT_SYSTEM_DIR + "(" + systemDir
                    + ") because of permissions.");
            LOG.warn("Manually delete the " + JTConfig.JT_SYSTEM_DIR + "(" + systemDir
                    + ") and then start the JobTracker.");
            LOG.warn("Bailing out ... ");
            throw ace;
        } catch (IOException ie) {
            LOG.info("problem cleaning system directory: " + systemDir, ie);
        }
        Thread.sleep(FS_ACCESS_RETRY_PERIOD);
    }

    if (Thread.currentThread().isInterrupted()) {
        throw new InterruptedException();
    }

    // initialize cluster variable
    cluster = new Cluster(this.conf);

    // now create a job client proxy
    jtClient = (ClientProtocol) RPC.getProxy(ClientProtocol.class, ClientProtocol.versionID,
            JobTracker.getAddress(conf), mrOwner, this.conf,
            NetUtils.getSocketFactory(conf, ClientProtocol.class));

    new SpeculativeScheduler().start();

    // initialize task event fetcher
    new TaskCompletionEventFetcher().start();

    // Same with 'localDir' except it's always on the local disk.
    asyncDiskService = new MRAsyncDiskService(FileSystem.getLocal(conf), conf.getLocalDirs());
    asyncDiskService.moveAndDeleteFromEachVolume(SUBDIR);

    // keep at least one asynchronous worker per CPU core
    int numProcs = Runtime.getRuntime().availableProcessors();
    LOG.info("# of available processors = " + numProcs);
    int maxFactor = conf.getInt(JT_MAX_ASYNC_WORKER_FACTOR, 2);
    asyncWorkers = new ThreadPoolExecutor(numProcs, numProcs * maxFactor, 30, TimeUnit.SECONDS,
            new SynchronousQueue<Runnable>(true), new ThreadPoolExecutor.CallerRunsPolicy());

    speculativeSplit = conf.getBoolean(JT_SPECULATIVE_SPLIT, false);
}

From source file:voldemort.store.readonly.fetcher.HdfsFetcher.java

License:Apache License

public File fetch(String sourceFileUrl, String destinationFile, String hadoopConfigPath) throws IOException {
    if (this.globalThrottleLimit != null) {
        if (this.globalThrottleLimit.getSpeculativeRate() < this.minBytesPerSecond)
            throw new VoldemortException("Too many push jobs.");
        this.globalThrottleLimit.incrementNumJobs();
    }// w w w  .j a  v a 2s  .co  m

    ObjectName jmxName = null;
    try {

        final Configuration config = new Configuration();
        FileSystem fs = null;
        config.setInt("io.socket.receive.buffer", bufferSize);
        config.set("hadoop.rpc.socket.factory.class.ClientProtocol", ConfigurableSocketFactory.class.getName());
        config.set("hadoop.security.group.mapping", "org.apache.hadoop.security.ShellBasedUnixGroupsMapping");

        final Path path = new Path(sourceFileUrl);

        boolean isHftpBasedFetch = sourceFileUrl.length() > 4 && sourceFileUrl.substring(0, 4).equals("hftp");
        logger.info("URL : " + sourceFileUrl + " and hftp protocol enabled = " + isHftpBasedFetch);
        logger.info("Hadoop path = " + hadoopConfigPath + " , keytab path = " + HdfsFetcher.keytabPath
                + " , kerberos principal = " + HdfsFetcher.kerberosPrincipal);

        if (hadoopConfigPath.length() > 0 && !isHftpBasedFetch) {

            config.addResource(new Path(hadoopConfigPath + "/core-site.xml"));
            config.addResource(new Path(hadoopConfigPath + "/hdfs-site.xml"));

            String security = config.get(CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION);

            if (security == null || !security.equals("kerberos")) {
                logger.error("Security isn't turned on in the conf: "
                        + CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION + " = "
                        + config.get(CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION));
                logger.error("Please make sure that the Hadoop config directory path is valid.");
                return null;
            } else {
                logger.info("Security is turned on in the conf. Trying to authenticate ...");

            }
        }

        try {

            if (HdfsFetcher.keytabPath.length() > 0 && !isHftpBasedFetch) {

                if (!new File(HdfsFetcher.keytabPath).exists()) {
                    logger.error("Invalid keytab file path. Please provide a valid keytab path");
                    return null;
                }

                // First login using the specified principal and keytab file
                UserGroupInformation.setConfiguration(config);
                UserGroupInformation.loginUserFromKeytab(HdfsFetcher.kerberosPrincipal, HdfsFetcher.keytabPath);

                /*
                 * If login is successful, get the filesystem object. NOTE:
                 * Ideally we do not need a doAs block for this. Consider
                 * removing it in the future once the Hadoop jars have the
                 * corresponding patch (tracked in the Hadoop Apache
                 * project: HDFS-3367)
                 */
                try {
                    logger.info("I've logged in and am now Doasing as "
                            + UserGroupInformation.getCurrentUser().getUserName());
                    fs = UserGroupInformation.getCurrentUser()
                            .doAs(new PrivilegedExceptionAction<FileSystem>() {

                                public FileSystem run() throws Exception {
                                    FileSystem fs = path.getFileSystem(config);
                                    return fs;
                                }
                            });
                } catch (InterruptedException e) {
                    logger.error(e.getMessage());
                } catch (Exception e) {
                    logger.error("Got an exception while getting the filesystem object: ");
                    logger.error("Exception class : " + e.getClass());
                    e.printStackTrace();
                    for (StackTraceElement et : e.getStackTrace()) {
                        logger.error(et.toString());
                    }
                }
            } else {
                fs = path.getFileSystem(config);
            }

        } catch (IOException e) {
            e.printStackTrace();
            logger.error("Error in authenticating or getting the Filesystem object !!!");
            return null;
        }

        CopyStats stats = new CopyStats(sourceFileUrl, sizeOfPath(fs, path));
        jmxName = JmxUtils.registerMbean("hdfs-copy-" + copyCount.getAndIncrement(), stats);
        File destination = new File(destinationFile);

        if (destination.exists()) {
            throw new VoldemortException(
                    "Version directory " + destination.getAbsolutePath() + " already exists");
        }

        logger.info("Starting fetch for : " + sourceFileUrl);
        boolean result = fetch(fs, path, destination, stats);
        logger.info("Completed fetch : " + sourceFileUrl);

        // Close the filesystem
        fs.close();

        if (result) {
            return destination;
        } else {
            return null;
        }
    } catch (IOException e) {
        logger.error("Error while getting Hadoop filesystem : " + e);
        return null;
    } finally {
        if (this.globalThrottleLimit != null) {
            this.globalThrottleLimit.decrementNumJobs();
        }
        if (jmxName != null)
            JmxUtils.unregisterMbean(jmxName);
    }
}

From source file:voldemort.store.readonly.fetcher.HdfsFetcher.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 1)
        Utils.croak("USAGE: java " + HdfsFetcher.class.getName()
                + " url [keytab location] [kerberos username] [hadoop-config-path]");
    String url = args[0];/*from www .ja v  a2 s.  c o  m*/

    String keytabLocation = "";
    String kerberosUser = "";
    String hadoopPath = "";
    if (args.length == 4) {
        keytabLocation = args[1];
        kerberosUser = args[2];
        hadoopPath = args[3];
    }

    long maxBytesPerSec = 1024 * 1024 * 1024;
    Path p = new Path(url);

    final Configuration config = new Configuration();
    final URI uri = new URI(url);
    config.setInt("io.file.buffer.size", VoldemortConfig.DEFAULT_BUFFER_SIZE);
    config.set("hadoop.rpc.socket.factory.class.ClientProtocol", ConfigurableSocketFactory.class.getName());
    config.setInt("io.socket.receive.buffer", 1 * 1024 * 1024 - 10000);

    FileSystem fs = null;
    p = new Path(url);
    HdfsFetcher.keytabPath = keytabLocation;
    HdfsFetcher.kerberosPrincipal = kerberosUser;

    boolean isHftpBasedFetch = url.length() > 4 && url.substring(0, 4).equals("hftp");
    logger.info("URL : " + url + " and hftp protocol enabled = " + isHftpBasedFetch);

    if (hadoopPath.length() > 0 && !isHftpBasedFetch) {
        config.set("hadoop.security.group.mapping", "org.apache.hadoop.security.ShellBasedUnixGroupsMapping");

        config.addResource(new Path(hadoopPath + "/core-site.xml"));
        config.addResource(new Path(hadoopPath + "/hdfs-site.xml"));

        String security = config.get(CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION);

        if (security == null || !security.equals("kerberos")) {
            logger.info("Security isn't turned on in the conf: "
                    + CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION + " = "
                    + config.get(CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION));
            logger.info("Fix that.  Exiting.");
            return;
        } else {
            logger.info("Security is turned on in the conf. Trying to authenticate ...");
        }
    }

    try {

        // Get the filesystem object
        if (keytabLocation.length() > 0 && !isHftpBasedFetch) {
            UserGroupInformation.setConfiguration(config);
            UserGroupInformation.loginUserFromKeytab(kerberosUser, keytabLocation);

            final Path path = p;
            try {
                logger.debug("I've logged in and am now Doasing as "
                        + UserGroupInformation.getCurrentUser().getUserName());
                fs = UserGroupInformation.getCurrentUser().doAs(new PrivilegedExceptionAction<FileSystem>() {

                    public FileSystem run() throws Exception {
                        FileSystem fs = path.getFileSystem(config);
                        return fs;
                    }
                });
            } catch (InterruptedException e) {
                logger.error(e.getMessage());
            } catch (Exception e) {
                logger.error("Got an exception while getting the filesystem object: ");
                logger.error("Exception class : " + e.getClass());
                e.printStackTrace();
                for (StackTraceElement et : e.getStackTrace()) {
                    logger.error(et.toString());
                }
            }
        } else {
            fs = p.getFileSystem(config);
        }

    } catch (IOException e) {
        e.printStackTrace();
        System.err.println("Error in getting Hadoop filesystem object !!! Exiting !!!");
        System.exit(-1);
    }

    FileStatus status = fs.listStatus(p)[0];
    long size = status.getLen();
    HdfsFetcher fetcher = new HdfsFetcher(null, maxBytesPerSec, VoldemortConfig.REPORTING_INTERVAL_BYTES,
            VoldemortConfig.DEFAULT_BUFFER_SIZE, 0, keytabLocation, kerberosUser);
    long start = System.currentTimeMillis();

    File location = fetcher.fetch(url, System.getProperty("java.io.tmpdir") + File.separator + start,
            hadoopPath);

    double rate = size * Time.MS_PER_SECOND / (double) (System.currentTimeMillis() - start);
    NumberFormat nf = NumberFormat.getInstance();
    nf.setMaximumFractionDigits(2);
    System.out.println(
            "Fetch to " + location + " completed: " + nf.format(rate / (1024.0 * 1024.0)) + " MB/sec.");
    fs.close();
}

From source file:wherehows.SchemaFetch.java

License:Open Source License

public SchemaFetch(Configuration conf) throws IOException, InterruptedException {
    logger = LoggerFactory.getLogger(getClass());
    this.conf = conf;

    schemaFileWriter = new FileWriter(this.conf.get(Constant.HDFS_SCHEMA_REMOTE_PATH_KEY));
    sampleFileWriter = new FileWriter(this.conf.get(Constant.HDFS_SAMPLE_REMOTE_PATH_KEY));

    // login from kerberos, get the file system
    String principal = this.conf.get(Constant.HDFS_REMOTE_USER_KEY);
    String keyLocation = this.conf.get(Constant.HDFS_REMOTE_KEYTAB_LOCATION_KEY, null);

    if (keyLocation == null) {
        System.out.println("No keytab file location specified, will ignore the kerberos login process");
        fs = FileSystem.get(new Configuration());
    } else {/* www.ja v  a2  s  . co  m*/
        try {
            Configuration hdfs_conf = new Configuration();
            hdfs_conf.set("hadoop.security.authentication", "Kerberos");
            hdfs_conf.set("dfs.namenode.kerberos.principal.pattern", "*");
            UserGroupInformation.setConfiguration(hdfs_conf);
            UserGroupInformation.loginUserFromKeytab(principal, keyLocation);
            fs = FileSystem.get(hdfs_conf);
        } catch (IOException e) {
            System.out.println("Failed, Try to login through kerberos. Priciple: " + principal
                    + " keytab location : " + keyLocation);
            e.printStackTrace();
            System.out.println("Use default, assume no kerbero needed");
            fs = FileSystem.get(new Configuration());
        }
    }

    // TODO Write to hdfs
    // String sampleDataFolder = "/projects/wherehows/hdfs/sample_data";
    // String cluster = this.conf.get("hdfs.cluster");
    // sampleDataAvroWriter = new AvroWriter(this.fs, sampleDataFolder + "/" + cluster, SampleDataRecord.class);
    // String schemaFolder = this.conf.get("hdfs.schema_location");

    fileAnalyzerFactory = new FileAnalyzerFactory(this.fs);
}