Source code

Java tutorial


Here is the source code for


* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.

package org.apache.hadoop.yarn.logaggregation;

import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.conf.YarnConfiguration;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class LogAggregationUtils {

    public static final String TMP_FILE_SUFFIX = ".tmp";

     * Constructs the full filename for an application's log file per node.
     * @param remoteRootLogDir the aggregated remote root log dir
     * @param appId the application Id
     * @param user the application owner
     * @param nodeId the node id
     * @param suffix the log dir suffix
     * @return the remote log file.
    public static Path getRemoteNodeLogFileForApp(Path remoteRootLogDir, ApplicationId appId, String user,
            NodeId nodeId, String suffix) {
        return new Path(getRemoteAppLogDir(remoteRootLogDir, appId, user, suffix), getNodeString(nodeId));

     * Gets the remote app log dir.
     * @param remoteRootLogDir the aggregated log remote root log dir
     * @param appId the application id
     * @param user the application owner
     * @param suffix the log directory suffix
     * @return the remote application specific log dir.
    public static Path getRemoteAppLogDir(Path remoteRootLogDir, ApplicationId appId, String user, String suffix) {
        return new Path(getRemoteLogSuffixedDir(remoteRootLogDir, user, suffix), appId.toString());

     * Gets the remote suffixed log dir for the user.
     * @param remoteRootLogDir the aggregated log remote root log dir
     * @param user the application owner
     * @param suffix the log dir suffix
     * @return the remote suffixed log dir.
    public static Path getRemoteLogSuffixedDir(Path remoteRootLogDir, String user, String suffix) {
        if (suffix == null || suffix.isEmpty()) {
            return getRemoteLogUserDir(remoteRootLogDir, user);
        // TODO Maybe support suffix to be more than a single file.
        return new Path(getRemoteLogUserDir(remoteRootLogDir, user), suffix);

     * Gets the remote log user dir.
     * @param remoteRootLogDir the aggregated log remote root log dir
     * @param user the application owner
     * @return the remote per user log dir.
    public static Path getRemoteLogUserDir(Path remoteRootLogDir, String user) {
        return new Path(remoteRootLogDir, user);

     * Returns the suffix component of the log dir.
     * @param conf the configuration
     * @return the suffix which will be appended to the user log dir.
    public static String getRemoteNodeLogDirSuffix(Configuration conf) {
        return conf.get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR_SUFFIX,

     * Converts a nodeId to a form used in the app log file name.
     * @param nodeId the nodeId
     * @return the node string to be used to construct the file name.
    public static String getNodeString(NodeId nodeId) {
        return nodeId.toString().replace(":", "_");

    public static String getNodeString(String nodeId) {
        return nodeId.toString().replace(":", "_");

     * Return the remote application log directory.
     * @param conf the configuration
     * @param appId the application
     * @param appOwner the application owner
     * @return the remote application log directory path
     * @throws IOException if we can not find remote application log directory
    public static org.apache.hadoop.fs.Path getRemoteAppLogDir(Configuration conf, ApplicationId appId,
            String appOwner) throws IOException {
        String suffix = LogAggregationUtils.getRemoteNodeLogDirSuffix(conf);
        org.apache.hadoop.fs.Path remoteRootLogDir = new org.apache.hadoop.fs.Path(
                conf.get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
        return getRemoteAppLogDir(conf, appId, appOwner, remoteRootLogDir, suffix);

     * Return the remote application log directory.
     * @param conf the configuration
     * @param appId the application
     * @param appOwner the application owner
     * @param remoteRootLogDir the remote root log directory
     * @param suffix the log directory suffix
     * @return the remote application log directory path
     * @throws IOException if we can not find remote application log directory
    public static org.apache.hadoop.fs.Path getRemoteAppLogDir(Configuration conf, ApplicationId appId,
            String appOwner, org.apache.hadoop.fs.Path remoteRootLogDir, String suffix) throws IOException {
        org.apache.hadoop.fs.Path remoteAppDir = null;
        if (appOwner == null) {
            org.apache.hadoop.fs.Path qualifiedRemoteRootLogDir = FileContext.getFileContext(conf)
            FileContext fc = FileContext.getFileContext(qualifiedRemoteRootLogDir.toUri(), conf);
            org.apache.hadoop.fs.Path toMatch = LogAggregationUtils.getRemoteAppLogDir(remoteRootLogDir, appId, "*",
            FileStatus[] matching = fc.util().globStatus(toMatch);
            if (matching == null || matching.length != 1) {
                throw new IOException(
                        "Can not find remote application directory for " + "the application:" + appId);
            remoteAppDir = matching[0].getPath();
        } else {
            remoteAppDir = LogAggregationUtils.getRemoteAppLogDir(remoteRootLogDir, appId, appOwner, suffix);
        return remoteAppDir;

     * Get all available log files under remote app log directory.
     * @param conf the configuration
     * @param appId the applicationId
     * @param appOwner the application owner
     * @param remoteRootLogDir the remote root log directory
     * @param suffix the log directory suffix
     * @return the iterator of available log files
     * @throws IOException if there is no log file available
    public static RemoteIterator<FileStatus> getRemoteNodeFileDir(Configuration conf, ApplicationId appId,
            String appOwner, org.apache.hadoop.fs.Path remoteRootLogDir, String suffix) throws IOException {
        Path remoteAppLogDir = getRemoteAppLogDir(conf, appId, appOwner, remoteRootLogDir, suffix);
        RemoteIterator<FileStatus> nodeFiles = null;
        Path qualifiedLogDir = FileContext.getFileContext(conf).makeQualified(remoteAppLogDir);
        nodeFiles = FileContext.getFileContext(qualifiedLogDir.toUri(), conf).listStatus(remoteAppLogDir);
        return nodeFiles;

     * Get all available log files under remote app log directory.
     * @param conf the configuration
     * @param appId the applicationId
     * @param appOwner the application owner
     * @param remoteRootLogDir the remote root log directory
     * @param suffix the log directory suffix
     * @return the list of available log files
     * @throws IOException if there is no log file available
    public static List<FileStatus> getRemoteNodeFileList(Configuration conf, ApplicationId appId, String appOwner,
            org.apache.hadoop.fs.Path remoteRootLogDir, String suffix) throws IOException {
        Path remoteAppLogDir = getRemoteAppLogDir(conf, appId, appOwner, remoteRootLogDir, suffix);
        List<FileStatus> nodeFiles = new ArrayList<>();
        Path qualifiedLogDir = FileContext.getFileContext(conf).makeQualified(remoteAppLogDir);
                FileContext.getFileContext(qualifiedLogDir.toUri(), conf).util().listStatus(remoteAppLogDir)));
        return nodeFiles;

     * Get all available log files under remote app log directory.
     * @param conf the configuration
     * @param appId the applicationId
     * @param appOwner the application owner
     * @return the iterator of available log files
     * @throws IOException if there is no log file available
    public static RemoteIterator<FileStatus> getRemoteNodeFileDir(Configuration conf, ApplicationId appId,
            String appOwner) throws IOException {
        Path remoteAppLogDir = getRemoteAppLogDir(conf, appId, appOwner);
        RemoteIterator<FileStatus> nodeFiles = null;
        Path qualifiedLogDir = FileContext.getFileContext(conf).makeQualified(remoteAppLogDir);
        nodeFiles = FileContext.getFileContext(qualifiedLogDir.toUri(), conf).listStatus(remoteAppLogDir);
        return nodeFiles;