io.druid.storage.hdfs.tasklog.HdfsTaskLogs.java Source code

Introduction

Here is the source code for io.druid.storage.hdfs.tasklog.HdfsTaskLogs.java
Source

/*
 * Druid - a distributed column store.
 * Copyright 2012 - 2015 Metamarkets Group Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.druid.storage.hdfs.tasklog;

import com.google.common.base.Optional;
import com.google.common.io.ByteSource;
import com.google.inject.Inject;
import com.metamx.common.logger.Logger;
import io.druid.tasklogs.TaskLogs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;

/**
 * Indexer hdfs task logs, to support storing hdfs tasks to hdfs.
 */
public class HdfsTaskLogs implements TaskLogs {
    private static final Logger log = new Logger(HdfsTaskLogs.class);

    private final HdfsTaskLogsConfig config;
    private final Configuration hadoopConfig;

    @Inject
    public HdfsTaskLogs(HdfsTaskLogsConfig config, Configuration hadoopConfig) {
        this.config = config;
        this.hadoopConfig = hadoopConfig;
    }

    @Override
    public void pushTaskLog(String taskId, File logFile) throws IOException {
        final Path path = getTaskLogFileFromId(taskId);
        log.info("Writing task log to: %s", path);
        final FileSystem fs = path.getFileSystem(hadoopConfig);
        FileUtil.copy(logFile, fs, path, false, hadoopConfig);
        log.info("Wrote task log to: %s", path);
    }

    @Override
    public Optional<ByteSource> streamTaskLog(final String taskId, final long offset) throws IOException {
        final Path path = getTaskLogFileFromId(taskId);
        final FileSystem fs = path.getFileSystem(hadoopConfig);
        if (fs.exists(path)) {
            return Optional.<ByteSource>of(new ByteSource() {
                @Override
                public InputStream openStream() throws IOException {
                    log.info("Reading task log from: %s", path);
                    final long seekPos;
                    if (offset < 0) {
                        final FileStatus stat = fs.getFileStatus(path);
                        seekPos = Math.max(0, stat.getLen() + offset);
                    } else {
                        seekPos = offset;
                    }
                    final FSDataInputStream inputStream = fs.open(path);
                    inputStream.seek(seekPos);
                    log.info("Read task log from: %s (seek = %,d)", path, seekPos);
                    return inputStream;
                }
            });
        } else {
            return Optional.absent();
        }
    }

    /**
     * Due to https://issues.apache.org/jira/browse/HDFS-13 ":" are not allowed in
     * path names. So we format paths differently for HDFS.
     */
    private Path getTaskLogFileFromId(String taskId) {
        return new Path(mergePaths(config.getDirectory(), taskId.replaceAll(":", "_")));
    }

    // some hadoop version Path.mergePaths does not exist
    private static String mergePaths(String path1, String path2) {
        return path1 + (path1.endsWith(Path.SEPARATOR) ? "" : Path.SEPARATOR) + path2;
    }
}