Example usage for org.apache.hadoop.fs FileSystem setPermission

List of usage examples for org.apache.hadoop.fs FileSystem setPermission

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem setPermission.

Prototype

public void setPermission(Path p, FsPermission permission) throws IOException 

Source Link

Document

Set permission of a path.

Usage

From source file:org.apache.flink.yarn.FlinkYarnClientBase.java

License:Apache License

/**
 * This method will block until the ApplicationMaster/JobManager have been
 * deployed on YARN./*from  w  w  w .j  av a  2  s.  c  om*/
 */
protected AbstractFlinkYarnCluster deployInternal() throws Exception {
    isReadyForDeployment();

    LOG.info("Using values:");
    LOG.info("\tTaskManager count = {}", taskManagerCount);
    LOG.info("\tJobManager memory = {}", jobManagerMemoryMb);
    LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb);

    // Create application via yarnClient
    yarnApplication = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse();

    // ------------------ Add dynamic properties to local flinkConfiguraton ------

    Map<String, String> dynProperties = CliFrontend.getDynamicProperties(dynamicPropertiesEncoded);
    for (Map.Entry<String, String> dynProperty : dynProperties.entrySet()) {
        flinkConfiguration.setString(dynProperty.getKey(), dynProperty.getValue());
    }

    try {
        org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration);
    } catch (IOException e) {
        throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e);
    }
    // ------------------ Check if the specified queue exists --------------

    try {
        List<QueueInfo> queues = yarnClient.getAllQueues();
        if (queues.size() > 0 && this.yarnQueue != null) { // check only if there are queues configured in yarn and for this session.
            boolean queueFound = false;
            for (QueueInfo queue : queues) {
                if (queue.getQueueName().equals(this.yarnQueue)) {
                    queueFound = true;
                    break;
                }
            }
            if (!queueFound) {
                String queueNames = "";
                for (QueueInfo queue : queues) {
                    queueNames += queue.getQueueName() + ", ";
                }
                LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: "
                        + queueNames);
            }
        } else {
            LOG.debug("The YARN cluster does not have any queues configured");
        }
    } catch (Throwable e) {
        LOG.warn("Error while getting queue information from YARN: " + e.getMessage());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error details", e);
        }
    }

    // ------------------ Check if the YARN Cluster has the requested resources --------------

    // the yarnMinAllocationMB specifies the smallest possible container allocation size.
    // all allocations below this value are automatically set to this value.
    final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0);
    if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) {
        LOG.warn("The JobManager or TaskManager memory is below the smallest possible YARN Container size. "
                + "The value of 'yarn.scheduler.minimum-allocation-mb' is '" + yarnMinAllocationMB
                + "'. Please increase the memory size."
                + "YARN will allocate the smaller containers but the scheduler will account for the minimum-allocation-mb, maybe not all instances "
                + "you requested will start.");
    }

    // set the memory to minAllocationMB to do the next checks correctly
    if (jobManagerMemoryMb < yarnMinAllocationMB) {
        jobManagerMemoryMb = yarnMinAllocationMB;
    }
    if (taskManagerMemoryMb < yarnMinAllocationMB) {
        taskManagerMemoryMb = yarnMinAllocationMB;
    }

    Resource maxRes = appResponse.getMaximumResourceCapability();
    final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the 'yarn.nodemanager.resource.memory-mb' configuration values\n";
    if (jobManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment();
        throw new YarnDeploymentException(
                "The cluster does not have the requested resources for the JobManager available!\n"
                        + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: " + jobManagerMemoryMb
                        + "MB. " + NOTE);
    }

    if (taskManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment();
        throw new YarnDeploymentException(
                "The cluster does not have the requested resources for the TaskManagers available!\n"
                        + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb
                        + "MB. " + NOTE);
    }

    final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, but maybe not all TaskManagers are "
            + "connecting from the beginning because the resources are currently not available in the cluster. "
            + "The allocation might take more time than usual because the Flink YARN client needs to wait until "
            + "the resources become available.";
    int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount;
    ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient);
    if (freeClusterMem.totalFreeMemory < totalMemoryRequired) {
        LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. "
                + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC);

    }
    if (taskManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb
                + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit
                + NOTE_RSC);
    }
    if (jobManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn(
                "The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than "
                        + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC);
    }

    // ----------------- check if the requested containers fit into the cluster.

    int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length);
    // first, allocate the jobManager somewhere.
    if (!allocateResource(nmFree, jobManagerMemoryMb)) {
        LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. "
                + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: "
                + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC);
    }
    // allocate TaskManagers
    for (int i = 0; i < taskManagerCount; i++) {
        if (!allocateResource(nmFree, taskManagerMemoryMb)) {
            LOG.warn("There is not enough memory available in the YARN cluster. "
                    + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. "
                    + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n"
                    + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/"
                    + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: "
                    + Arrays.toString(nmFree) + NOTE_RSC);
        }
    }

    // ------------------ Prepare Application Master Container  ------------------------------

    // respect custom JVM options in the YAML file
    final String javaOpts = flinkConfiguration.getString(ConfigConstants.FLINK_JVM_OPTIONS, "");

    String logbackFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME;
    boolean hasLogback = new File(logbackFile).exists();
    String log4jFile = configurationDirectory + File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME;

    boolean hasLog4j = new File(log4jFile).exists();
    if (hasLogback) {
        shipFiles.add(new File(logbackFile));
    }
    if (hasLog4j) {
        shipFiles.add(new File(log4jFile));
    }

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    String amCommand = "$JAVA_HOME/bin/java" + " -Xmx"
            + Utils.calculateHeapSize(jobManagerMemoryMb, flinkConfiguration) + "M " + javaOpts;

    if (hasLogback || hasLog4j) {
        amCommand += " -Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.log\"";

        if (hasLogback) {
            amCommand += " -Dlogback.configurationFile=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME;
        }

        if (hasLog4j) {
            amCommand += " -Dlog4j.configuration=file:" + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME;
        }
    }

    amCommand += " " + getApplicationMasterClass().getName() + " " + " 1>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.out" + " 2>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.err";
    amContainer.setCommands(Collections.singletonList(amCommand));

    LOG.debug("Application Master start command: " + amCommand);

    // intialize HDFS
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    final FileSystem fs = FileSystem.get(conf);

    // hard coded check for the GoogleHDFS client because its not overriding the getScheme() method.
    if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) {
        LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the "
                + "specified Hadoop configuration path is wrong and the system is using the default Hadoop configuration values."
                + "The Flink YARN client needs to store its files in a distributed file system");
    }

    // Set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();

    if (RecoveryMode.isHighAvailabilityModeActivated(flinkConfiguration)) {
        // activate re-execution of failed applications
        appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS,
                YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));

        activateHighAvailabilitySupport(appContext);
    } else {
        // set number of application retries to 1 in the default case
        appContext
                .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1));
    }

    final ApplicationId appId = appContext.getApplicationId();

    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);
    Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar,
            fs.getHomeDirectory());
    Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf,
            fs.getHomeDirectory());
    Map<String, LocalResource> localResources = new HashMap<>(2);
    localResources.put("flink.jar", appMasterJar);
    localResources.put("flink-conf.yaml", flinkConf);

    // setup security tokens (code from apache storm)
    final Path[] paths = new Path[2 + shipFiles.size()];
    StringBuilder envShipFileList = new StringBuilder();
    // upload ship files
    for (int i = 0; i < shipFiles.size(); i++) {
        File shipFile = shipFiles.get(i);
        LocalResource shipResources = Records.newRecord(LocalResource.class);
        Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath());
        paths[2 + i] = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources,
                fs.getHomeDirectory());
        localResources.put(shipFile.getName(), shipResources);

        envShipFileList.append(paths[2 + i]);
        if (i + 1 < shipFiles.size()) {
            envShipFileList.append(',');
        }
    }

    paths[0] = remotePathJar;
    paths[1] = remotePathConf;
    sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/");

    FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
    fs.setPermission(sessionFilesDir, permission); // set permission for path.

    Utils.setTokensFor(amContainer, paths, conf);

    amContainer.setLocalResources(localResources);
    fs.close();

    // Setup CLASSPATH for ApplicationMaster
    Map<String, String> appMasterEnv = new HashMap<>();
    // set user specified app master environment variables
    appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX,
            flinkConfiguration));
    // set classpath from YARN configuration
    Utils.setupEnv(conf, appMasterEnv);
    // set Flink on YARN internal configuration values
    appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount));
    appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb));
    appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_USERNAME,
            UserGroupInformation.getCurrentUser().getShortUserName());
    appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots));
    appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached));

    if (dynamicPropertiesEncoded != null) {
        appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded);
    }

    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(jobManagerMemoryMb);
    capability.setVirtualCores(1);

    String name;
    if (customName == null) {
        name = "Flink session with " + taskManagerCount + " TaskManagers";
        if (detached) {
            name += " (detached)";
        }
    } else {
        name = customName;
    }

    appContext.setApplicationName(name); // application name
    appContext.setApplicationType("Apache Flink");
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    if (yarnQueue != null) {
        appContext.setQueue(yarnQueue);
    }

    // add a hook to clean up in case deployment fails
    Runtime.getRuntime().addShutdownHook(deploymentFailureHook);
    LOG.info("Submitting application master " + appId);
    yarnClient.submitApplication(appContext);

    LOG.info("Waiting for the cluster to be allocated");
    int waittime = 0;
    loop: while (true) {
        ApplicationReport report;
        try {
            report = yarnClient.getApplicationReport(appId);
        } catch (IOException e) {
            throw new YarnDeploymentException("Failed to deploy the cluster: " + e.getMessage());
        }
        YarnApplicationState appState = report.getYarnApplicationState();
        switch (appState) {
        case FAILED:
        case FINISHED:
        case KILLED:
            throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState
                    + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n"
                    + "If log aggregation is enabled on your cluster, use this command to further investigate the issue:\n"
                    + "yarn logs -applicationId " + appId);
            //break ..
        case RUNNING:
            LOG.info("YARN application has been deployed successfully.");
            break loop;
        default:
            LOG.info("Deploying cluster, current state " + appState);
            if (waittime > 60000) {
                LOG.info(
                        "Deployment took more than 60 seconds. Please check if the requested resources are available in the YARN cluster");
            }

        }
        waittime += 1000;
        Thread.sleep(1000);
    }
    // print the application id for user to cancel themselves.
    if (isDetached()) {
        LOG.info("The Flink YARN client has been started in detached mode. In order to stop "
                + "Flink on YARN, use the following command or a YARN web interface to stop "
                + "it:\nyarn application -kill " + appId + "\nPlease also note that the "
                + "temporary files of the YARN session in the home directoy will not be removed.");
    }
    // since deployment was successful, remove the hook
    try {
        Runtime.getRuntime().removeShutdownHook(deploymentFailureHook);
    } catch (IllegalStateException e) {
        // we're already in the shut down hook.
    }
    // the Flink cluster is deployed in YARN. Represent cluster
    return new FlinkYarnCluster(yarnClient, appId, conf, flinkConfiguration, sessionFilesDir, detached);
}

From source file:org.apache.gobblin.data.management.conversion.hive.task.HiveConverterUtils.java

License:Apache License

/**
 * Creates a staging directory with the permission as in source directory.
 * @param fs filesystem object//w  w  w .  j a va2s  .co  m
 * @param destination staging directory location
 * @param conversionEntity conversion entity used to get source directory permissions
 * @param workUnit workunit
 */
public static void createStagingDirectory(FileSystem fs, String destination,
        HiveProcessingEntity conversionEntity, WorkUnitState workUnit) {
    /*
     * Create staging data location with the same permissions as source data location
     *
     * Note that hive can also automatically create the non-existing directories but it does not
     * seem to create it with the desired permissions.
     * According to hive docs permissions for newly created directories/files can be controlled using uMask like,
     *
     * SET hive.warehouse.subdir.inherit.perms=false;
     * SET fs.permissions.umask-mode=022;
     * Upon testing, this did not work
     */
    Path destinationPath = new Path(destination);
    try {
        FsPermission permission;
        String group = null;
        if (conversionEntity.getTable().getDataLocation() != null) {
            FileStatus sourceDataFileStatus = fs.getFileStatus(conversionEntity.getTable().getDataLocation());
            permission = sourceDataFileStatus.getPermission();
            group = sourceDataFileStatus.getGroup();
        } else {
            permission = FsPermission.getDefault();
        }

        if (!fs.mkdirs(destinationPath, permission)) {
            throw new RuntimeException(
                    String.format("Failed to create path %s with permissions %s", destinationPath, permission));
        } else {
            fs.setPermission(destinationPath, permission);
            // Set the same group as source
            if (group != null && !workUnit.getPropAsBoolean(HIVE_DATASET_DESTINATION_SKIP_SETGROUP,
                    DEFAULT_HIVE_DATASET_DESTINATION_SKIP_SETGROUP)) {
                fs.setOwner(destinationPath, null, group);
            }
            log.info(String.format("Created %s with permissions %s and group %s", destinationPath, permission,
                    group));
        }
    } catch (IOException e) {
        Throwables.propagate(e);
    }
}

From source file:org.apache.gobblin.util.AvroUtils.java

License:Apache License

/**
 * Write a schema to a file/*from  ww w.java  2s  . c om*/
 * @param schema the schema
 * @param filePath the target file
 * @param tempFilePath if not null then this path is used for a temporary file used to stage the write
 * @param fs a {@link FileSystem}
 * @param overwrite should any existing target file be overwritten?
 * @param perm permissions
 * @throws IOException
 */
public static void writeSchemaToFile(Schema schema, Path filePath, Path tempFilePath, FileSystem fs,
        boolean overwrite, FsPermission perm) throws IOException {
    boolean fileExists = fs.exists(filePath);

    if (!overwrite) {
        Preconditions.checkState(!fileExists, filePath + " already exists");
    } else {
        // delete the target file now if not using a staging file
        if (fileExists && null == tempFilePath) {
            HadoopUtils.deletePath(fs, filePath, true);
            // file has been removed
            fileExists = false;
        }
    }

    // If the file exists then write to a temp file to make the replacement as close to atomic as possible
    Path writeFilePath = fileExists ? tempFilePath : filePath;

    try (DataOutputStream dos = fs.create(writeFilePath)) {
        dos.writeChars(schema.toString());
    }
    fs.setPermission(writeFilePath, perm);

    // Replace existing file with the staged file
    if (fileExists) {
        if (!fs.delete(filePath, true)) {
            throw new IOException(String.format("Failed to delete %s while renaming %s to %s", filePath,
                    tempFilePath, filePath));
        }

        HadoopUtils.movePath(fs, tempFilePath, fs, filePath, true, fs.getConf());
    }
}

From source file:org.apache.gobblin.util.WriterUtils.java

License:Apache License

public static void mkdirsWithRecursivePermissionWithRetry(final FileSystem fs, final Path path,
        FsPermission perm, Config retrierConfig) throws IOException {

    if (fs.exists(path)) {
        return;/*from  ww w  .  j  a  v  a2s.  c  o  m*/
    }

    if (path.getParent() != null && !fs.exists(path.getParent())) {
        mkdirsWithRecursivePermissionWithRetry(fs, path.getParent(), perm, retrierConfig);
    }

    if (!fs.mkdirs(path, perm)) {
        throw new IOException(String.format("Unable to mkdir %s with permission %s", path, perm));
    }

    if (retrierConfig != NO_RETRY_CONFIG) {
        //Wait until file is not there as it can happen the file fail to exist right away on eventual consistent fs like Amazon S3
        Retryer<Void> retryer = RetryerFactory.newInstance(retrierConfig);

        try {
            retryer.call(() -> {
                if (!fs.exists(path)) {
                    throw new IOException(
                            "Path " + path + " does not exist however it should. Will wait more.");
                }
                return null;
            });
        } catch (Exception e) {
            throw new IOException("Path " + path + "does not exist however it should. Giving up..." + e);
        }
    }

    // Double check permission, since fs.mkdirs() may not guarantee to set the permission correctly
    if (!fs.getFileStatus(path).getPermission().equals(perm)) {
        fs.setPermission(path, perm);
    }
}

From source file:org.apache.hama.bsp.BSPJobClient.java

License:Apache License

public RunningJob submitJobInternal(BSPJob pJob, BSPJobID jobId) throws IOException {
    BSPJob job = pJob;//  w  ww.ja  va 2s .c om
    job.setJobID(jobId);

    int maxTasks;
    int configured = job.getConfiguration().getInt(Constants.MAX_TASKS_PER_JOB, job.getNumBspTask());

    ClusterStatus clusterStatus = getClusterStatus(true);
    // Re-adjust the maxTasks based on cluster status.
    if (clusterStatus != null) {
        maxTasks = clusterStatus.getMaxTasks() - clusterStatus.getTasks();

        if (configured > maxTasks) {
            LOG.warn("The configured number of tasks has exceeded the maximum allowed. Job will run with "
                    + (maxTasks) + " tasks.");
            job.setNumBspTask(maxTasks);
        }
    } else {
        maxTasks = configured;
    }

    Path submitJobDir = new Path(getSystemDir(), "submit_" + Integer.toString(Math.abs(r.nextInt()), 36));
    Path submitSplitFile = new Path(submitJobDir, "job.split");
    Path submitJarFile = new Path(submitJobDir, "job.jar");
    Path submitJobFile = new Path(submitJobDir, "job.xml");
    LOG.debug("BSPJobClient.submitJobDir: " + submitJobDir);

    FileSystem fs = getFs();
    // Create a number of filenames in the BSPMaster's fs namespace
    fs.delete(submitJobDir, true);
    submitJobDir = fs.makeQualified(submitJobDir);
    submitJobDir = new Path(submitJobDir.toUri().getPath());
    FsPermission bspSysPerms = new FsPermission(JOB_DIR_PERMISSION);
    FileSystem.mkdirs(fs, submitJobDir, bspSysPerms);
    fs.mkdirs(submitJobDir);
    short replication = (short) job.getInt("bsp.submit.replication", 10);

    // only create the splits if we have an input
    if ((job.get(Constants.JOB_INPUT_DIR) != null) || (job.get("bsp.join.expr") != null)) {
        // Create the splits for the job
        LOG.debug("Creating splits at " + fs.makeQualified(submitSplitFile));

        InputSplit[] splits = job.getInputFormat().getSplits(job,
                (maxTasks > configured) ? configured : maxTasks);

        if (job.getConfiguration().getBoolean(Constants.ENABLE_RUNTIME_PARTITIONING, false)) {
            LOG.info("Run pre-partitioning job");
            job = partition(job, splits, maxTasks);
            maxTasks = job.getInt("hama.partition.count", maxTasks);
        }

        if (job.getBoolean("input.has.partitioned", false)) {
            splits = job.getInputFormat().getSplits(job, maxTasks);
        }

        if (maxTasks < splits.length) {
            throw new IOException(
                    "Job failed! The number of splits has exceeded the number of max tasks. The number of splits: "
                            + splits.length + ", The number of max tasks: " + maxTasks);
        }

        int numOfSplits = writeSplits(job, splits, submitSplitFile, maxTasks);
        if (numOfSplits > configured
                || !job.getConfiguration().getBoolean(Constants.FORCE_SET_BSP_TASKS, false)) {
            job.setNumBspTask(numOfSplits);
        }

        job.set("bsp.job.split.file", submitSplitFile.toString());
    }

    String originalJarPath = job.getJar();

    if (originalJarPath != null) { // copy jar to BSPMaster's fs
        // use jar name if job is not named.
        if ("".equals(job.getJobName())) {
            job.setJobName(new Path(originalJarPath).getName());
        }
        job.setJar(submitJarFile.toString());
        fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile);

        fs.setReplication(submitJarFile, replication);
        fs.setPermission(submitJarFile, new FsPermission(JOB_FILE_PERMISSION));
    } else {
        LOG.warn("No job jar file set.  User classes may not be found. "
                + "See BSPJob#setJar(String) or check Your jar file.");
    }

    // Set the user's name and working directory
    job.setUser(getUnixUserName());
    job.set("group.name", getUnixUserGroupName(job.getUser()));
    if (job.getWorkingDirectory() == null) {
        job.setWorkingDirectory(fs.getWorkingDirectory());
    }

    // Write job file to BSPMaster's fs
    FSDataOutputStream out = FileSystem.create(fs, submitJobFile, new FsPermission(JOB_FILE_PERMISSION));

    try {
        job.writeXml(out);
    } finally {
        out.close();
    }

    return launchJob(jobId, job, submitJobFile, fs);
}

From source file:org.apache.hcatalog.cli.HCatDriver.java

License:Apache License

private int setFSPermsNGrp(SessionState ss) {

    Configuration conf = ss.getConf();

    String tblName = conf.get(HCatConstants.HCAT_CREATE_TBL_NAME, "");
    if (tblName.isEmpty()) {
        tblName = conf.get("import.destination.table", "");
        conf.set("import.destination.table", "");
    }/*from  w  ww  .  j a  v a  2 s .  c  om*/
    String dbName = conf.get(HCatConstants.HCAT_CREATE_DB_NAME, "");
    String grp = conf.get(HCatConstants.HCAT_GROUP, null);
    String permsStr = conf.get(HCatConstants.HCAT_PERMS, null);

    if (tblName.isEmpty() && dbName.isEmpty()) {
        // it wasn't create db/table
        return 0;
    }

    if (null == grp && null == permsStr) {
        // there were no grp and perms to begin with.
        return 0;
    }

    FsPermission perms = FsPermission.valueOf(permsStr);

    if (!tblName.isEmpty()) {
        Hive db = null;
        try {
            db = Hive.get();
            Table tbl = db.getTable(tblName);
            Path tblPath = tbl.getPath();

            FileSystem fs = tblPath.getFileSystem(conf);
            if (null != perms) {
                fs.setPermission(tblPath, perms);
            }
            if (null != grp) {
                fs.setOwner(tblPath, null, grp);
            }
            return 0;

        } catch (Exception e) {
            ss.err.println(String.format("Failed to set permissions/groups on TABLE: <%s> %s", tblName,
                    e.getMessage()));
            try { // We need to drop the table.
                if (null != db) {
                    db.dropTable(tblName);
                }
            } catch (HiveException he) {
                ss.err.println(String.format(
                        "Failed to drop TABLE <%s> after failing to set permissions/groups on it. %s", tblName,
                        e.getMessage()));
            }
            return 1;
        }
    } else {
        // looks like a db operation
        if (dbName.isEmpty() || dbName.equals(MetaStoreUtils.DEFAULT_DATABASE_NAME)) {
            // We dont set perms or groups for default dir.
            return 0;
        } else {
            try {
                Hive db = Hive.get();
                Path dbPath = new Warehouse(conf).getDatabasePath(db.getDatabase(dbName));
                FileSystem fs = dbPath.getFileSystem(conf);
                if (perms != null) {
                    fs.setPermission(dbPath, perms);
                }
                if (null != grp) {
                    fs.setOwner(dbPath, null, grp);
                }
                return 0;
            } catch (Exception e) {
                ss.err.println(String.format("Failed to set permissions and/or group on DB: <%s> %s", dbName,
                        e.getMessage()));
                try {
                    Hive.get().dropDatabase(dbName);
                } catch (Exception e1) {
                    ss.err.println(String.format(
                            "Failed to drop DB <%s> after failing to set permissions/group on it. %s", dbName,
                            e1.getMessage()));
                }
                return 1;
            }
        }
    }
}

From source file:org.apache.hcatalog.HcatTestUtils.java

License:Apache License

/**
 * Removes all databases and tables from the metastore
 *///from www  .j ava2 s.c o  m
public static void cleanupHMS(Hive hive, Warehouse wh, FsPermission defaultPerm)
        throws HiveException, MetaException, NoSuchObjectException {
    for (String dbName : hive.getAllDatabases()) {
        if (dbName.equals("default")) {
            continue;
        }
        try {
            Path path = getDbPath(hive, wh, dbName);
            FileSystem whFs = path.getFileSystem(hive.getConf());
            whFs.setPermission(path, defaultPerm);
        } catch (IOException ex) {
            //ignore
        }
        hive.dropDatabase(dbName, true, true, true);
    }

    //clean tables in default db
    for (String tablename : hive.getAllTables("default")) {
        hive.dropTable("default", tablename, true, true);
    }
}

From source file:org.apache.hcatalog.mapreduce.FileOutputCommitterContainer.java

License:Apache License

private void applyGroupAndPerms(FileSystem fs, Path dir, FsPermission permission, String group,
        boolean recursive) throws IOException {
    fs.setPermission(dir, permission);
    if (recursive) {
        for (FileStatus fileStatus : fs.listStatus(dir)) {
            if (fileStatus.isDir()) {
                applyGroupAndPerms(fs, fileStatus.getPath(), permission, group, true);
            } else {
                fs.setPermission(fileStatus.getPath(), permission);
            }/*from w  ww  .ja  va 2s.c  om*/
        }
    }
}

From source file:org.apache.hcatalog.mapreduce.TestHCatMultiOutputFormat.java

License:Apache License

private static void createTable(String tableName, String tablePerm) throws Exception {
    Table tbl = new Table();
    tbl.setDbName(DATABASE);//from  w ww  .j  a va  2 s . c o  m
    tbl.setTableName(tableName);
    StorageDescriptor sd = new StorageDescriptor();
    sd.setCols(ColumnHolder.colMapping.get(tableName));
    tbl.setSd(sd);
    sd.setParameters(new HashMap<String, String>());
    sd.setSerdeInfo(new SerDeInfo());
    sd.getSerdeInfo().setName(tbl.getTableName());
    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
    sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName());
    sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName());
    sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1");
    sd.getSerdeInfo().setSerializationLib(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName());
    tbl.setPartitionKeys(ColumnHolder.partitionCols);

    hmsc.createTable(tbl);
    FileSystem fs = FileSystem.get(mrConf);
    fs.setPermission(new Path(warehousedir, tableName), new FsPermission(tablePerm));
}

From source file:org.apache.hive.hcatalog.mapreduce.FileOutputCommitterContainer.java

License:Apache License

private void applyGroupAndPerms(FileSystem fs, Path dir, FsPermission permission, String group,
        boolean recursive) throws IOException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("applyGroupAndPerms : " + dir + " perms: " + permission + " group: " + group + " recursive: "
                + recursive);/*from   ww w .  ja  v a2 s  .c  om*/
    }
    fs.setPermission(dir, permission);
    if (recursive) {
        for (FileStatus fileStatus : fs.listStatus(dir)) {
            if (fileStatus.isDir()) {
                applyGroupAndPerms(fs, fileStatus.getPath(), permission, group, true);
            } else {
                fs.setPermission(fileStatus.getPath(), permission);
            }
        }
    }
}