Example usage for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent()

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

@Override
public void notifyCheckpointComplete(long checkpointId) throws Exception {
    synchronized (bucketState.pendingFilesPerCheckpoint) {
        Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
        Set<Long> checkpointsToRemove = new HashSet<>();
        for (Long pastCheckpointId : pastCheckpointIds) {
            if (pastCheckpointId <= checkpointId) {
                LOG.debug("Moving pending files to final location for checkpoint {}", pastCheckpointId);
                // All the pending files are buckets that have been completed but are waiting to be renamed
                // to their final name
                for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
                    Path finalPath = new Path(filename);
                    Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                            .suffix(pendingSuffix);

                    FileSystem fs = pendingPath.getFileSystem(new org.apache.hadoop.conf.Configuration());
                    fs.rename(pendingPath, finalPath);
                    LOG.debug("Moving pending file {} to final location after complete checkpoint {}.",
                            pendingPath, pastCheckpointId);
                }//from  w  w w . ja va  2s  . c om
                checkpointsToRemove.add(pastCheckpointId);
            }
        }
        for (Long toRemove : checkpointsToRemove) {
            bucketState.pendingFilesPerCheckpoint.remove(toRemove);
        }
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

@Override
public void restoreState(BucketState state) {
    bucketState = state;//www  .  j  a  v  a  2 s . co m
    // we can clean all the pending files since they where renamed to final files
    // after this checkpoint was successfull
    bucketState.pendingFiles.clear();
    FileSystem fs = null;
    try {
        fs = new Path(basePath).getFileSystem(new org.apache.hadoop.conf.Configuration());
    } catch (IOException e) {
        LOG.error("Error while creating FileSystem in checkpoint restore.", e);
        throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e);
    }
    if (bucketState.currentFile != null) {
        // We were writing to a file when the last checkpoint occured. This file can either
        // be still in-progress or became a pending file at some point after the checkpoint.
        // Either way, we have to truncate it back to a valid state (or write a .valid-length)
        // file that specifies up to which length it is valid and rename it to the final name
        // before starting a new bucket file.
        Path partPath = new Path(bucketState.currentFile);
        try {
            Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName())
                    .suffix(pendingSuffix);
            Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName())
                    .suffix(inProgressSuffix);

            if (fs.exists(partPendingPath)) {
                LOG.debug(
                        "In-progress file {} has been moved to pending after checkpoint, moving to final location.",
                        partPath);
                // has been moved to pending in the mean time, rename to final location
                fs.rename(partPendingPath, partPath);
            } else if (fs.exists(partInProgressPath)) {
                LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath);
                // it was still in progress, rename to final path
                fs.rename(partInProgressPath, partPath);
            } else if (fs.exists(partPath)) {
                LOG.debug("In-Progress file {} was already moved to final location {}.",
                        bucketState.currentFile, partPath);
            } else {
                LOG.debug(
                        "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, "
                                + "it was moved to final location by a previous snapshot restore",
                        bucketState.currentFile);
            }

            refTruncate = reflectTruncate(fs);
            // truncate it or write a ".valid-length" file to specify up to which point it is valid
            if (refTruncate != null) {
                LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength);
                // some-one else might still hold the lease from a previous try, we are
                // recovering, after all ...
                if (fs instanceof DistributedFileSystem) {
                    DistributedFileSystem dfs = (DistributedFileSystem) fs;
                    LOG.debug("Trying to recover file lease {}", partPath);
                    dfs.recoverLease(partPath);
                    boolean isclosed = dfs.isFileClosed(partPath);
                    StopWatch sw = new StopWatch();
                    sw.start();
                    while (!isclosed) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        isclosed = dfs.isFileClosed(partPath);
                    }
                }
                Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath,
                        bucketState.currentFileValidLength);
                if (!truncated) {
                    LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath);

                    // we must wait for the asynchronous truncate operation to complete
                    StopWatch sw = new StopWatch();
                    sw.start();
                    long newLen = fs.getFileStatus(partPath).getLen();
                    while (newLen != bucketState.currentFileValidLength) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        newLen = fs.getFileStatus(partPath).getLen();
                    }
                    if (newLen != bucketState.currentFileValidLength) {
                        throw new RuntimeException("Truncate did not truncate to right length. Should be "
                                + bucketState.currentFileValidLength + " is " + newLen + ".");
                    }
                }

            } else {
                LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath,
                        bucketState.currentFileValidLength);
                Path validLengthFilePath = new Path(partPath.getParent(),
                        validLengthPrefix + partPath.getName()).suffix(validLengthSuffix);
                if (!fs.exists(validLengthFilePath)) {
                    FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath);
                    lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength));
                    lengthFileOut.close();
                }
            }

            // invalidate in the state object
            bucketState.currentFile = null;
            bucketState.currentFileValidLength = -1;
        } catch (IOException e) {
            LOG.error("Error while restoring RollingSink state.", e);
            throw new RuntimeException("Error while restoring RollingSink state.", e);
        } catch (InvocationTargetException | IllegalAccessException e) {
            LOG.error("Cound not invoke truncate.", e);
            throw new RuntimeException("Could not invoke truncate.", e);
        }
    }

    LOG.debug("Clearing pending/in-progress files.");

    // Move files that are confirmed by a checkpoint but did not get moved to final location
    // because the checkpoint notification did not happen before a failure

    Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
    LOG.debug("Moving pending files to final location on restore.");
    for (Long pastCheckpointId : pastCheckpointIds) {
        // All the pending files are buckets that have been completed but are waiting to be renamed
        // to their final name
        for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
            Path finalPath = new Path(filename);
            Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                    .suffix(pendingSuffix);

            try {
                if (fs.exists(pendingPath)) {
                    LOG.debug(
                            "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.",
                            pendingPath, pastCheckpointId);
                    fs.rename(pendingPath, finalPath);
                }
            } catch (IOException e) {
                LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}", pendingPath,
                        finalPath, e);
                throw new RuntimeException(
                        "Error while renaming pending file " + pendingPath + " to final path " + finalPath, e);
            }
        }
    }
    bucketState.pendingFiles.clear();
    synchronized (bucketState.pendingFilesPerCheckpoint) {
        bucketState.pendingFilesPerCheckpoint.clear();
    }

    // we need to get this here since open() has not yet been called
    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    // delete pending files
    try {

        RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

        while (bucketFiles.hasNext()) {
            LocatedFileStatus file = bucketFiles.next();
            if (file.getPath().toString().endsWith(pendingSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
            if (file.getPath().toString().endsWith(inProgressSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting old pending files: {}", e);
        throw new RuntimeException("Error while deleting old pending files.", e);
    }
}

From source file:org.apache.flink.yarn.AbstractYarnClusterDescriptor.java

License:Apache License

public ApplicationReport startAppMaster(JobGraph jobGraph, YarnClient yarnClient,
        YarnClientApplication yarnApplication) throws Exception {

    // ------------------ Set default file system scheme -------------------------

    try {/* w ww.  j a  v a 2  s .c  om*/
        org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration);
    } catch (IOException e) {
        throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e);
    }

    // initialize file system
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    final FileSystem fs = FileSystem.get(conf);

    // hard coded check for the GoogleHDFS client because its not overriding the getScheme() method.
    if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) {
        LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the "
                + "specified Hadoop configuration path is wrong and the system is using the default Hadoop configuration values."
                + "The Flink YARN client needs to store its files in a distributed file system");
    }

    ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();
    Set<File> effectiveShipFiles = new HashSet<>(shipFiles.size());
    for (File file : shipFiles) {
        effectiveShipFiles.add(file.getAbsoluteFile());
    }

    //check if there is a logback or log4j file
    File logbackFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOGBACK_NAME);
    final boolean hasLogback = logbackFile.exists();
    if (hasLogback) {
        effectiveShipFiles.add(logbackFile);
    }

    File log4jFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOG4J_NAME);
    final boolean hasLog4j = log4jFile.exists();
    if (hasLog4j) {
        effectiveShipFiles.add(log4jFile);
        if (hasLogback) {
            // this means there is already a logback configuration file --> fail
            LOG.warn("The configuration directory ('" + configurationDirectory + "') contains both LOG4J and "
                    + "Logback configuration files. Please delete or rename one of them.");
        }
    }

    addLibFolderToShipFiles(effectiveShipFiles);

    // add the user jar to the classpath of the to-be-created cluster
    if (userJarFiles != null) {
        effectiveShipFiles.addAll(userJarFiles);
    }

    // Set-up ApplicationSubmissionContext for the application

    final ApplicationId appId = appContext.getApplicationId();

    // ------------------ Add Zookeeper namespace to local flinkConfiguraton ------
    String zkNamespace = getZookeeperNamespace();
    // no user specified cli argument for namespace?
    if (zkNamespace == null || zkNamespace.isEmpty()) {
        // namespace defined in config? else use applicationId as default.
        zkNamespace = flinkConfiguration.getString(HighAvailabilityOptions.HA_CLUSTER_ID,
                String.valueOf(appId));
        setZookeeperNamespace(zkNamespace);
    }

    flinkConfiguration.setString(HighAvailabilityOptions.HA_CLUSTER_ID, zkNamespace);

    if (HighAvailabilityMode.isHighAvailabilityModeActivated(flinkConfiguration)) {
        // activate re-execution of failed applications
        appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS,
                YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));

        activateHighAvailabilitySupport(appContext);
    } else {
        // set number of application retries to 1 in the default case
        appContext
                .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1));
    }

    // local resource map for Yarn
    final Map<String, LocalResource> localResources = new HashMap<>(2 + effectiveShipFiles.size());
    // list of remote paths (after upload)
    final List<Path> paths = new ArrayList<>(2 + effectiveShipFiles.size());
    // classpath assembler
    final StringBuilder classPathBuilder = new StringBuilder();
    // ship list that enables reuse of resources for task manager containers
    StringBuilder envShipFileList = new StringBuilder();

    // upload and register ship files
    for (File shipFile : effectiveShipFiles) {
        LocalResource shipResources = Records.newRecord(LocalResource.class);

        Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath());
        Path remotePath = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources,
                fs.getHomeDirectory());

        paths.add(remotePath);

        localResources.put(shipFile.getName(), shipResources);

        if (shipFile.isDirectory()) {
            // add directories to the classpath
            java.nio.file.Path shipPath = shipFile.toPath();
            final java.nio.file.Path parentPath = shipPath.getParent();

            Files.walkFileTree(shipPath, new SimpleFileVisitor<java.nio.file.Path>() {
                @Override
                public FileVisitResult preVisitDirectory(java.nio.file.Path dir, BasicFileAttributes attrs)
                        throws IOException {
                    super.preVisitDirectory(dir, attrs);

                    java.nio.file.Path relativePath = parentPath.relativize(dir);

                    classPathBuilder.append(relativePath).append(File.separator).append("*")
                            .append(File.pathSeparator);

                    return FileVisitResult.CONTINUE;
                }
            });
        } else {
            // add files to the classpath
            classPathBuilder.append(shipFile.getName()).append(File.pathSeparator);
        }

        envShipFileList.append(remotePath).append(",");
    }

    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);
    Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar,
            fs.getHomeDirectory());
    Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf,
            fs.getHomeDirectory());
    localResources.put("flink.jar", appMasterJar);
    localResources.put("flink-conf.yaml", flinkConf);

    paths.add(remotePathJar);
    classPathBuilder.append("flink.jar").append(File.pathSeparator);
    paths.add(remotePathConf);
    classPathBuilder.append("flink-conf.yaml").append(File.pathSeparator);

    // write job graph to tmp file and add it to local resource
    // TODO: server use user main method to generate job graph
    if (jobGraph != null) {
        try {
            File fp = File.createTempFile(appId.toString(), null);
            fp.deleteOnExit();
            try (FileOutputStream output = new FileOutputStream(fp);
                    ObjectOutputStream obOutput = new ObjectOutputStream(output);) {
                obOutput.writeObject(jobGraph);
            }
            LocalResource jobgraph = Records.newRecord(LocalResource.class);
            Path remoteJobGraph = Utils.setupLocalResource(fs, appId.toString(), new Path(fp.toURI()), jobgraph,
                    fs.getHomeDirectory());
            localResources.put("job.graph", jobgraph);
            paths.add(remoteJobGraph);
            classPathBuilder.append("job.graph").append(File.pathSeparator);
        } catch (Exception e) {
            LOG.warn("Add job graph to local resource fail");
            throw e;
        }
    }

    sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/");

    FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
    fs.setPermission(sessionFilesDir, permission); // set permission for path.

    //To support Yarn Secure Integration Test Scenario
    //In Integration test setup, the Yarn containers created by YarnMiniCluster does not have the Yarn site XML
    //and KRB5 configuration files. We are adding these files as container local resources for the container
    //applications (JM/TMs) to have proper secure cluster setup
    Path remoteKrb5Path = null;
    Path remoteYarnSiteXmlPath = null;
    boolean hasKrb5 = false;
    if (System.getenv("IN_TESTS") != null) {
        String krb5Config = System.getProperty("java.security.krb5.conf");
        if (krb5Config != null && krb5Config.length() != 0) {
            File krb5 = new File(krb5Config);
            LOG.info("Adding KRB5 configuration {} to the AM container local resource bucket",
                    krb5.getAbsolutePath());
            LocalResource krb5ConfResource = Records.newRecord(LocalResource.class);
            Path krb5ConfPath = new Path(krb5.getAbsolutePath());
            remoteKrb5Path = Utils.setupLocalResource(fs, appId.toString(), krb5ConfPath, krb5ConfResource,
                    fs.getHomeDirectory());
            localResources.put(Utils.KRB5_FILE_NAME, krb5ConfResource);

            File f = new File(System.getenv("YARN_CONF_DIR"), Utils.YARN_SITE_FILE_NAME);
            LOG.info("Adding Yarn configuration {} to the AM container local resource bucket",
                    f.getAbsolutePath());
            LocalResource yarnConfResource = Records.newRecord(LocalResource.class);
            Path yarnSitePath = new Path(f.getAbsolutePath());
            remoteYarnSiteXmlPath = Utils.setupLocalResource(fs, appId.toString(), yarnSitePath,
                    yarnConfResource, fs.getHomeDirectory());
            localResources.put(Utils.YARN_SITE_FILE_NAME, yarnConfResource);

            hasKrb5 = true;
        }
    }

    // setup security tokens
    LocalResource keytabResource = null;
    Path remotePathKeytab = null;
    String keytab = flinkConfiguration.getString(SecurityOptions.KERBEROS_LOGIN_KEYTAB);
    if (keytab != null) {
        LOG.info("Adding keytab {} to the AM container local resource bucket", keytab);
        keytabResource = Records.newRecord(LocalResource.class);
        Path keytabPath = new Path(keytab);
        remotePathKeytab = Utils.setupLocalResource(fs, appId.toString(), keytabPath, keytabResource,
                fs.getHomeDirectory());
        localResources.put(Utils.KEYTAB_FILE_NAME, keytabResource);
    }

    final ContainerLaunchContext amContainer = setupApplicationMasterContainer(hasLogback, hasLog4j, hasKrb5);

    if (UserGroupInformation.isSecurityEnabled() && keytab == null) {
        //set tokens only when keytab is not provided
        LOG.info("Adding delegation token to the AM container..");
        Utils.setTokensFor(amContainer, paths, conf);
    }

    amContainer.setLocalResources(localResources);
    fs.close();

    // Setup CLASSPATH and environment variables for ApplicationMaster
    final Map<String, String> appMasterEnv = new HashMap<>();
    // set user specified app master environment variables
    appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX,
            flinkConfiguration));
    // set Flink app class path
    appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString());

    // set Flink on YARN internal configuration values
    appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount));
    appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb));
    appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots));
    appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached));
    appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE, getZookeeperNamespace());

    // https://github.com/apache/hadoop/blob/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnApplicationSecurity.md#identity-on-an-insecure-cluster-hadoop_user_name
    appMasterEnv.put(YarnConfigKeys.ENV_HADOOP_USER_NAME, UserGroupInformation.getCurrentUser().getUserName());

    if (keytabResource != null) {
        appMasterEnv.put(YarnConfigKeys.KEYTAB_PATH, remotePathKeytab.toString());
        String principal = flinkConfiguration.getString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL);
        appMasterEnv.put(YarnConfigKeys.KEYTAB_PRINCIPAL, principal);
    }

    //To support Yarn Secure Integration Test Scenario
    if (remoteYarnSiteXmlPath != null && remoteKrb5Path != null) {
        appMasterEnv.put(YarnConfigKeys.ENV_YARN_SITE_XML_PATH, remoteYarnSiteXmlPath.toString());
        appMasterEnv.put(YarnConfigKeys.ENV_KRB5_PATH, remoteKrb5Path.toString());
    }

    if (dynamicPropertiesEncoded != null) {
        appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded);
    }

    // set classpath from YARN configuration
    Utils.setupYarnClassPath(conf, appMasterEnv);

    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(jobManagerMemoryMb);
    capability.setVirtualCores(1);

    String name;
    if (customName == null) {
        name = "Flink session with " + taskManagerCount + " TaskManagers";
        if (detached) {
            name += " (detached)";
        }
    } else {
        name = customName;
    }

    appContext.setApplicationName(name);
    appContext.setApplicationType("Apache Flink");
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    if (yarnQueue != null) {
        appContext.setQueue(yarnQueue);
    }

    setApplicationTags(appContext);

    // add a hook to clean up in case deployment fails
    Thread deploymentFailureHook = new DeploymentFailureHook(yarnClient, yarnApplication);
    Runtime.getRuntime().addShutdownHook(deploymentFailureHook);
    LOG.info("Submitting application master " + appId);
    yarnClient.submitApplication(appContext);

    LOG.info("Waiting for the cluster to be allocated");
    final long startTime = System.currentTimeMillis();
    ApplicationReport report;
    YarnApplicationState lastAppState = YarnApplicationState.NEW;
    loop: while (true) {
        try {
            report = yarnClient.getApplicationReport(appId);
        } catch (IOException e) {
            throw new YarnDeploymentException("Failed to deploy the cluster.", e);
        }
        YarnApplicationState appState = report.getYarnApplicationState();
        LOG.debug("Application State: {}", appState);
        switch (appState) {
        case FAILED:
        case FINISHED: //TODO: the finished state may be valid in flip-6
        case KILLED:
            throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState
                    + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n"
                    + "If log aggregation is enabled on your cluster, use this command to further investigate the issue:\n"
                    + "yarn logs -applicationId " + appId);
            //break ..
        case RUNNING:
            LOG.info("YARN application has been deployed successfully.");
            break loop;
        default:
            if (appState != lastAppState) {
                LOG.info("Deploying cluster, current state " + appState);
            }
            if (System.currentTimeMillis() - startTime > 60000) {
                LOG.info(
                        "Deployment took more than 60 seconds. Please check if the requested resources are available in the YARN cluster");
            }

        }
        lastAppState = appState;
        Thread.sleep(250);
    }
    // print the application id for user to cancel themselves.
    if (isDetachedMode()) {
        LOG.info("The Flink YARN client has been started in detached mode. In order to stop "
                + "Flink on YARN, use the following command or a YARN web interface to stop "
                + "it:\nyarn application -kill " + appId + "\nPlease also note that the "
                + "temporary files of the YARN session in the home directoy will not be removed.");
    }
    // since deployment was successful, remove the hook
    try {
        Runtime.getRuntime().removeShutdownHook(deploymentFailureHook);
    } catch (IllegalStateException e) {
        // we're already in the shut down hook.
    }
    return report;
}

From source file:org.apache.giraph.io.formats.GiraphTextOutputFormat.java

License:Apache License

@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
    String extension = "";
    CompressionCodec codec = null;/* w  w  w  .j  a  v  a  2s . c  o  m*/
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);

    /* adjust the path */
    FSDataOutputStream fileOut;
    FileSystem fs = file.getFileSystem(conf);
    String subdir = getSubdir();
    if (!subdir.isEmpty()) {
        Path subdirPath = new Path(subdir);
        Path subdirAbsPath = new Path(file.getParent(), subdirPath);
        Path vertexFile = new Path(subdirAbsPath, file.getName());
        fileOut = fs.create(vertexFile, false);
    } else {
        fileOut = fs.create(file, false);
    }

    String separator = "\t";

    if (!isCompressed) {
        return new LineRecordWriter<Text, Text>(fileOut, separator);
    } else {
        DataOutputStream out = new DataOutputStream(codec.createOutputStream(fileOut));
        return new LineRecordWriter<Text, Text>(out, separator);
    }
}

From source file:org.apache.gobblin.cluster.GobblinHelixJobLauncher.java

License:Apache License

/**
 * Create a job from a given batch of {@link WorkUnit}s.
 *///from w w  w .  j a v a 2s.co m
private JobConfig.Builder createJob(List<WorkUnit> workUnits) throws IOException {
    Map<String, TaskConfig> taskConfigMap = Maps.newHashMap();

    try (ParallelRunner stateSerDeRunner = new ParallelRunner(this.stateSerDeRunnerThreads, this.fs)) {
        int multiTaskIdSequence = 0;
        for (WorkUnit workUnit : workUnits) {
            if (workUnit instanceof MultiWorkUnit) {
                workUnit.setId(
                        JobLauncherUtils.newMultiTaskId(this.jobContext.getJobId(), multiTaskIdSequence++));
            }
            addWorkUnit(workUnit, stateSerDeRunner, taskConfigMap);
        }

        Path jobStateFilePath;

        // write the job.state using the state store if present, otherwise serialize directly to the file
        if (this.stateStores.haveJobStateStore()) {
            jobStateFilePath = GobblinClusterUtils.getJobStateFilePath(true, this.appWorkDir,
                    this.jobContext.getJobId());
            this.stateStores.getJobStateStore().put(jobStateFilePath.getParent().getName(),
                    jobStateFilePath.getName(), this.jobContext.getJobState());
        } else {
            jobStateFilePath = GobblinClusterUtils.getJobStateFilePath(false, this.appWorkDir,
                    this.jobContext.getJobId());
            SerializationUtils.serializeState(this.fs, jobStateFilePath, this.jobContext.getJobState());
        }

        LOGGER.debug("GobblinHelixJobLauncher.createJob: jobStateFilePath {}, jobState {} jobProperties {}",
                jobStateFilePath, this.jobContext.getJobState().toString(),
                this.jobContext.getJobState().getProperties());
    }

    JobConfig.Builder jobConfigBuilder = new JobConfig.Builder();

    // Helix task attempts = retries + 1 (fallback to general task retry for backward compatibility)
    jobConfigBuilder.setMaxAttemptsPerTask(this.jobContext.getJobState().getPropAsInt(
            GobblinClusterConfigurationKeys.HELIX_TASK_MAX_ATTEMPTS_KEY,
            this.jobContext.getJobState().getPropAsInt(ConfigurationKeys.MAX_TASK_RETRIES_KEY,
                    ConfigurationKeys.DEFAULT_MAX_TASK_RETRIES))
            + 1);

    // Helix task timeout (fallback to general task timeout for backward compatibility)
    jobConfigBuilder.setTimeoutPerTask(this.jobContext.getJobState().getPropAsLong(
            GobblinClusterConfigurationKeys.HELIX_TASK_TIMEOUT_SECONDS,
            this.jobContext.getJobState().getPropAsLong(ConfigurationKeys.TASK_TIMEOUT_SECONDS,
                    ConfigurationKeys.DEFAULT_TASK_TIMEOUT_SECONDS))
            * 1000);

    jobConfigBuilder.setFailureThreshold(workUnits.size());
    jobConfigBuilder.addTaskConfigMap(taskConfigMap).setCommand(GobblinTaskRunner.GOBBLIN_TASK_FACTORY_NAME);
    jobConfigBuilder.setNumConcurrentTasksPerInstance(
            ConfigUtils.getInt(jobConfig, GobblinClusterConfigurationKeys.HELIX_CLUSTER_TASK_CONCURRENCY,
                    GobblinClusterConfigurationKeys.HELIX_CLUSTER_TASK_CONCURRENCY_DEFAULT));

    if (this.jobConfig.hasPath(GobblinClusterConfigurationKeys.HELIX_JOB_TAG_KEY)) {
        String jobTag = this.jobConfig.getString(GobblinClusterConfigurationKeys.HELIX_JOB_TAG_KEY);
        log.info("Job {} has tags associated : {}", this.jobContext.getJobId(), jobTag);
        jobConfigBuilder.setInstanceGroupTag(jobTag);
    }

    if (this.jobConfig.hasPath(GobblinClusterConfigurationKeys.HELIX_JOB_TYPE_KEY)) {
        String jobType = this.jobConfig.getString(GobblinClusterConfigurationKeys.HELIX_JOB_TYPE_KEY);
        log.info("Job {} has types associated : {}", this.jobContext.getJobId(), jobType);
        jobConfigBuilder.setJobType(jobType);
    }

    if (Task.getExecutionModel(ConfigUtils.configToState(jobConfig)).equals(ExecutionModel.STREAMING)) {
        jobConfigBuilder.setRebalanceRunningTask(true);
    }

    jobConfigBuilder.setExpiry(this.jobContext.getJobState().getPropAsLong(
            GobblinClusterConfigurationKeys.HELIX_WORKFLOW_EXPIRY_TIME_SECONDS,
            GobblinClusterConfigurationKeys.DEFAULT_HELIX_WORKFLOW_EXPIRY_TIME_SECONDS));

    return jobConfigBuilder;
}

From source file:org.apache.gobblin.cluster.GobblinHelixJobLauncher.java

License:Apache License

/**
 * Persist a single {@link WorkUnit} (flattened) to a file.
 *///from www.  j  ava  2 s .c om
private String persistWorkUnit(final Path workUnitFileDir, final WorkUnit workUnit,
        ParallelRunner stateSerDeRunner) throws IOException {
    final StateStore stateStore;
    String workUnitFileName = workUnit.getId();

    if (workUnit instanceof MultiWorkUnit) {
        workUnitFileName += MULTI_WORK_UNIT_FILE_EXTENSION;
        stateStore = stateStores.getMwuStateStore();
    } else {
        workUnitFileName += WORK_UNIT_FILE_EXTENSION;
        stateStore = stateStores.getWuStateStore();
    }

    Path workUnitFile = new Path(workUnitFileDir, workUnitFileName);
    final String fileName = workUnitFile.getName();
    final String storeName = workUnitFile.getParent().getName();
    stateSerDeRunner.submitCallable(new Callable<Void>() {
        @Override
        public Void call() throws Exception {
            stateStore.put(storeName, fileName, workUnit);
            return null;
        }
    }, "Serialize state to store " + storeName + " file " + fileName);

    return workUnitFile.toString();
}

From source file:org.apache.gobblin.compaction.action.CompactionCompleteFileOperationAction.java

License:Apache License

/**
 * Replace or append the destination folder with new files from map-reduce job
 * Create a record count file containing the number of records that have been processed .
 *//*ww w  .ja  v  a  2 s  .c  o  m*/
public void onCompactionJobComplete(FileSystemDataset dataset) throws IOException {
    if (configurator != null && configurator.isJobCreated()) {
        CompactionPathParser.CompactionParserResult result = new CompactionPathParser(state).parse(dataset);
        Path tmpPath = configurator.getMrOutputPath();
        Path dstPath = new Path(result.getDstAbsoluteDir());

        // this is append delta mode due to the compaction rename source dir mode being enabled
        boolean appendDeltaOutput = this.state.getPropAsBoolean(
                MRCompactor.COMPACTION_RENAME_SOURCE_DIR_ENABLED,
                MRCompactor.DEFAULT_COMPACTION_RENAME_SOURCE_DIR_ENABLED);

        Job job = this.configurator.getConfiguredJob();

        long newTotalRecords = 0;
        long oldTotalRecords = helper.readRecordCount(new Path(result.getDstAbsoluteDir()));
        long executeCount = helper.readExecutionCount(new Path(result.getDstAbsoluteDir()));

        List<Path> goodPaths = CompactionJobConfigurator.getGoodFiles(job, tmpPath, this.fs,
                ImmutableList.of(configurator.getFileExtension()));

        if (appendDeltaOutput) {
            FsPermission permission = HadoopUtils.deserializeFsPermission(this.state,
                    MRCompactorJobRunner.COMPACTION_JOB_OUTPUT_DIR_PERMISSION, FsPermission.getDefault());
            WriterUtils.mkdirsWithRecursivePermission(this.fs, dstPath, permission);
            // append files under mr output to destination
            for (Path filePath : goodPaths) {
                String fileName = filePath.getName();
                log.info(String.format("Adding %s to %s", filePath.toString(), dstPath));
                Path outPath = new Path(dstPath, fileName);

                if (!this.fs.rename(filePath, outPath)) {
                    throw new IOException(
                            String.format("Unable to move %s to %s", filePath.toString(), outPath.toString()));
                }
            }

            // Obtain record count from input file names.
            // We don't get record count from map-reduce counter because in the next run, the threshold (delta record)
            // calculation is based on the input file names. By pre-defining which input folders are involved in the
            // MR execution, it is easy to track how many files are involved in MR so far, thus calculating the number of total records
            // (all previous run + current run) is possible.
            newTotalRecords = this.configurator.getFileNameRecordCount();
        } else {
            this.fs.delete(dstPath, true);
            FsPermission permission = HadoopUtils.deserializeFsPermission(this.state,
                    MRCompactorJobRunner.COMPACTION_JOB_OUTPUT_DIR_PERMISSION, FsPermission.getDefault());

            WriterUtils.mkdirsWithRecursivePermission(this.fs, dstPath.getParent(), permission);
            if (!this.fs.rename(tmpPath, dstPath)) {
                throw new IOException(String.format("Unable to move %s to %s", tmpPath, dstPath));
            }

            // Obtain record count from map reduce job counter
            // We don't get record count from file name because tracking which files are actually involved in the MR execution can
            // be hard. This is due to new minutely data is rolled up to hourly folder but from daily compaction perspective we are not
            // able to tell which file are newly added (because we simply pass all hourly folders to MR job instead of individual files).
            Counter counter = job.getCounters().findCounter(RecordKeyMapperBase.EVENT_COUNTER.RECORD_COUNT);
            newTotalRecords = counter.getValue();
        }

        State compactState = helper.loadState(new Path(result.getDstAbsoluteDir()));
        compactState.setProp(CompactionSlaEventHelper.RECORD_COUNT_TOTAL, Long.toString(newTotalRecords));
        compactState.setProp(CompactionSlaEventHelper.EXEC_COUNT_TOTAL, Long.toString(executeCount + 1));
        compactState.setProp(CompactionSlaEventHelper.MR_JOB_ID,
                this.configurator.getConfiguredJob().getJobID().toString());
        helper.saveState(new Path(result.getDstAbsoluteDir()), compactState);

        log.info("Updating record count from {} to {} in {} [{}]", oldTotalRecords, newTotalRecords, dstPath,
                executeCount + 1);

        // submit events for record count
        if (eventSubmitter != null) {
            Map<String, String> eventMetadataMap = ImmutableMap.of(CompactionSlaEventHelper.DATASET_URN,
                    dataset.datasetURN(), CompactionSlaEventHelper.RECORD_COUNT_TOTAL,
                    Long.toString(newTotalRecords), CompactionSlaEventHelper.PREV_RECORD_COUNT_TOTAL,
                    Long.toString(oldTotalRecords), CompactionSlaEventHelper.EXEC_COUNT_TOTAL,
                    Long.toString(executeCount + 1), CompactionSlaEventHelper.MR_JOB_ID,
                    this.configurator.getConfiguredJob().getJobID().toString());
            this.eventSubmitter.submit(CompactionSlaEventHelper.COMPACTION_RECORD_COUNT_EVENT,
                    eventMetadataMap);
        }
    }
}

From source file:org.apache.gobblin.compaction.action.CompactionMarkDirectoryAction.java

License:Apache License

public void onCompactionJobComplete(FileSystemDataset dataset) throws IOException {
    boolean renamingRequired = this.state.getPropAsBoolean(MRCompactor.COMPACTION_RENAME_SOURCE_DIR_ENABLED,
            MRCompactor.DEFAULT_COMPACTION_RENAME_SOURCE_DIR_ENABLED);

    if (renamingRequired) {
        Collection<Path> paths = configurator.getMapReduceInputPaths();
        for (Path path : paths) {
            Path newPath = new Path(path.getParent(),
                    path.getName() + MRCompactor.COMPACTION_RENAME_SOURCE_DIR_SUFFIX);
            log.info("[{}] Renaming {} to {}", dataset.datasetURN(), path, newPath);
            fs.rename(path, newPath);/*from  w  w  w  . java  2s .co  m*/
        }

        // submit events if directory is renamed
        if (eventSubmitter != null) {
            Map<String, String> eventMetadataMap = ImmutableMap.of(CompactionSlaEventHelper.DATASET_URN,
                    dataset.datasetURN(), CompactionSlaEventHelper.RENAME_DIR_PATHS,
                    Joiner.on(',').join(paths));
            this.eventSubmitter.submit(CompactionSlaEventHelper.COMPACTION_MARK_DIR_EVENT, eventMetadataMap);
        }
    }
}

From source file:org.apache.gobblin.compliance.restore.RestorableHivePartitionDataset.java

License:Apache License

public void restore() throws IOException {
    State state = new State(this.state);
    this.datasetOwnerFs = ProxyUtils.getOwnerFs(state, this.datasetOwner);
    try (HiveProxyQueryExecutor queryExecutor = ProxyUtils.getQueryExecutor(state, this.datasetOwner,
            this.datasetToRestoreOwner, this.trashOwner)) {
        if (this.state.getPropAsBoolean(ComplianceConfigurationKeys.COMPLIANCE_JOB_SIMULATE,
                ComplianceConfigurationKeys.DEFAULT_COMPLIANCE_JOB_SIMULATE)) {
            log.info("Simulating restore of " + datasetURN() + " with " + this.datasetToRestore.datasetURN());
            return;
        }/*from ww w .  ja v  a 2  s .co  m*/

        Path trashPartitionLocation = getTrashPartitionLocation();
        executeTrashTableQueries(queryExecutor);
        this.datasetOwnerFs.mkdirs(trashPartitionLocation.getParent());
        this.datasetOwnerFs.rename(getLocation(), trashPartitionLocation);
        FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.NONE);
        HadoopUtils.setPermissions(trashPartitionLocation.getParent(), this.datasetOwner, this.trashOwner,
                this.datasetOwnerFs, permission);
        log.info("Moved dataset " + datasetURN() + " from " + getLocation() + " to trash location "
                + trashPartitionLocation);
        fsMove(this.datasetToRestore.getLocation(), getLocation());
        HadoopUtils.setPermissions(getLocation().getParent(), this.datasetOwner, this.trashOwner,
                this.datasetOwnerFs, permission);
        log.info("Moved data from backup " + this.datasetToRestore.getLocation() + " to location "
                + getLocation());
        executeDropPartitionQueries(queryExecutor);
    }
}

From source file:org.apache.gobblin.compliance.retention.HivePartitionVersionRetentionReaper.java

License:Apache License

private void fsMove(Path from, Path to) throws IOException {
    if (PartitionUtils.isUnixTimeStamp(from.getName())) {
        this.versionOwnerFs.rename(from, to.getParent());
    } else {/*  w w  w.ja  va 2  s . co  m*/
        for (FileStatus fileStatus : this.versionOwnerFs.listStatus(from)) {
            if (fileStatus.isFile()) {
                this.versionOwnerFs.rename(fileStatus.getPath(), to);
            }
        }
    }
}