org.apache.druid.storage.hdfs.tasklog.HdfsTaskLogs.java Source code

Introduction

Here is the source code for org.apache.druid.storage.hdfs.tasklog.HdfsTaskLogs.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.storage.hdfs.tasklog;

import com.google.common.base.Optional;
import com.google.common.io.ByteSource;
import com.google.common.io.ByteStreams;
import com.google.inject.Inject;
import org.apache.druid.java.util.common.IOE;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.tasklogs.TaskLogs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

/**
 * Indexer hdfs task logs, to support storing hdfs tasks to hdfs.
 */
public class HdfsTaskLogs implements TaskLogs {
    private static final Logger log = new Logger(HdfsTaskLogs.class);

    private final HdfsTaskLogsConfig config;
    private final Configuration hadoopConfig;

    @Inject
    public HdfsTaskLogs(HdfsTaskLogsConfig config, Configuration hadoopConfig) {
        this.config = config;
        this.hadoopConfig = hadoopConfig;
    }

    @Override
    public void pushTaskLog(String taskId, File logFile) throws IOException {
        final Path path = getTaskLogFileFromId(taskId);
        log.info("Writing task log to: %s", path);
        pushTaskFile(path, logFile);
        log.info("Wrote task log to: %s", path);
    }

    @Override
    public void pushTaskReports(String taskId, File reportFile) throws IOException {
        final Path path = getTaskReportsFileFromId(taskId);
        log.info("Writing task reports to: %s", path);
        pushTaskFile(path, reportFile);
        log.info("Wrote task reports to: %s", path);
    }

    private void pushTaskFile(Path path, File logFile) throws IOException {
        final FileSystem fs = path.getFileSystem(hadoopConfig);
        try (final InputStream in = new FileInputStream(logFile); final OutputStream out = fs.create(path, true)) {
            ByteStreams.copy(in, out);
        }
    }

    @Override
    public Optional<ByteSource> streamTaskLog(final String taskId, final long offset) throws IOException {
        final Path path = getTaskLogFileFromId(taskId);
        return streamTaskFile(path, offset);
    }

    @Override
    public Optional<ByteSource> streamTaskReports(String taskId) throws IOException {
        final Path path = getTaskReportsFileFromId(taskId);
        return streamTaskFile(path, 0);
    }

    private Optional<ByteSource> streamTaskFile(final Path path, final long offset) throws IOException {
        final FileSystem fs = path.getFileSystem(hadoopConfig);
        if (fs.exists(path)) {
            return Optional.of(new ByteSource() {
                @Override
                public InputStream openStream() throws IOException {
                    log.info("Reading task log from: %s", path);
                    final long seekPos;
                    if (offset < 0) {
                        final FileStatus stat = fs.getFileStatus(path);
                        seekPos = Math.max(0, stat.getLen() + offset);
                    } else {
                        seekPos = offset;
                    }
                    final FSDataInputStream inputStream = fs.open(path);
                    inputStream.seek(seekPos);
                    log.info("Read task log from: %s (seek = %,d)", path, seekPos);
                    return inputStream;
                }
            });
        } else {
            return Optional.absent();
        }
    }

    /**
     * Due to https://issues.apache.org/jira/browse/HDFS-13 ":" are not allowed in
     * path names. So we format paths differently for HDFS.
     */
    private Path getTaskLogFileFromId(String taskId) {
        return new Path(mergePaths(config.getDirectory(), taskId.replace(':', '_')));
    }

    /**
     * Due to https://issues.apache.org/jira/browse/HDFS-13 ":" are not allowed in
     * path names. So we format paths differently for HDFS.
     */
    private Path getTaskReportsFileFromId(String taskId) {
        return new Path(mergePaths(config.getDirectory(), taskId.replace(':', '_') + ".reports.json"));
    }

    // some hadoop version Path.mergePaths does not exist
    private static String mergePaths(String path1, String path2) {
        return path1 + (path1.endsWith(Path.SEPARATOR) ? "" : Path.SEPARATOR) + path2;
    }

    @Override
    public void killAll() throws IOException {
        log.info("Deleting all task logs from hdfs dir [%s].", config.getDirectory());
        Path taskLogDir = new Path(config.getDirectory());
        FileSystem fs = taskLogDir.getFileSystem(hadoopConfig);
        fs.delete(taskLogDir, true);
    }

    @Override
    public void killOlderThan(long timestamp) throws IOException {
        Path taskLogDir = new Path(config.getDirectory());
        FileSystem fs = taskLogDir.getFileSystem(hadoopConfig);
        if (fs.exists(taskLogDir)) {

            if (!fs.isDirectory(taskLogDir)) {
                throw new IOE("taskLogDir [%s] must be a directory.", taskLogDir);
            }

            RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(taskLogDir);
            while (iter.hasNext()) {
                LocatedFileStatus file = iter.next();
                if (file.getModificationTime() < timestamp) {
                    Path p = file.getPath();
                    log.info("Deleting hdfs task log [%s].", p.toUri().toString());
                    fs.delete(p, true);
                }

                if (Thread.currentThread().isInterrupted()) {
                    throw new IOException(
                            new InterruptedException("Thread interrupted. Couldn't delete all tasklogs."));
                }
            }
        }
    }
}