io.druid.storage.hdfs.HdfsDataSegmentPusher.java Source code

Introduction

Here is the source code for io.druid.storage.hdfs.HdfsDataSegmentPusher.java
Source

/*
 * Druid - a distributed column store.
 * Copyright 2012 - 2015 Metamarkets Group Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.druid.storage.hdfs;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.ByteSink;
import com.google.common.io.ByteSource;
import com.google.inject.Inject;
import com.metamx.common.CompressionUtils;
import com.metamx.common.logger.Logger;
import io.druid.segment.SegmentUtils;
import io.druid.segment.loading.DataSegmentPusher;
import io.druid.segment.loading.DataSegmentPusherUtil;
import io.druid.timeline.DataSegment;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.File;
import java.io.IOException;
import java.io.OutputStream;

/**
 */
public class HdfsDataSegmentPusher implements DataSegmentPusher {
    private static final Logger log = new Logger(HdfsDataSegmentPusher.class);

    private final HdfsDataSegmentPusherConfig config;
    private final Configuration hadoopConfig;
    private final ObjectMapper jsonMapper;

    @Inject
    public HdfsDataSegmentPusher(HdfsDataSegmentPusherConfig config, Configuration hadoopConfig,
            ObjectMapper jsonMapper) {
        this.config = config;
        this.hadoopConfig = hadoopConfig;
        this.jsonMapper = jsonMapper;

        log.info("Configured HDFS as deep storage");
    }

    @Override
    public String getPathForHadoop(String dataSource) {
        return new Path(config.getStorageDirectory()).toUri().toString();
    }

    @Override
    public DataSegment push(File inDir, DataSegment segment) throws IOException {
        final String storageDir = DataSegmentPusherUtil.getHdfsStorageDir(segment);

        log.info("Copying segment[%s] to HDFS at location[%s/%s]", segment.getIdentifier(),
                config.getStorageDirectory(), storageDir);

        Path outFile = new Path(String.format("%s/%s/index.zip", config.getStorageDirectory(), storageDir));
        FileSystem fs = outFile.getFileSystem(hadoopConfig);

        fs.mkdirs(outFile.getParent());
        log.info("Compressing files from[%s] to [%s]", inDir, outFile);

        final long size;
        try (FSDataOutputStream out = fs.create(outFile)) {
            size = CompressionUtils.zip(inDir, out);
        }

        return createDescriptorFile(segment.withLoadSpec(makeLoadSpec(outFile)).withSize(size)
                .withBinaryVersion(SegmentUtils.getVersionFromDir(inDir)), outFile.getParent(), fs);
    }

    private DataSegment createDescriptorFile(DataSegment segment, Path outDir, final FileSystem fs)
            throws IOException {
        final Path descriptorFile = new Path(outDir, "descriptor.json");
        log.info("Creating descriptor file at[%s]", descriptorFile);
        ByteSource.wrap(jsonMapper.writeValueAsBytes(segment))
                .copyTo(new HdfsOutputStreamSupplier(fs, descriptorFile));
        return segment;
    }

    private ImmutableMap<String, Object> makeLoadSpec(Path outFile) {
        return ImmutableMap.<String, Object>of("type", "hdfs", "path", outFile.toString());
    }

    private static class HdfsOutputStreamSupplier extends ByteSink {
        private final FileSystem fs;
        private final Path descriptorFile;

        public HdfsOutputStreamSupplier(FileSystem fs, Path descriptorFile) {
            this.fs = fs;
            this.descriptorFile = descriptorFile;
        }

        @Override
        public OutputStream openStream() throws IOException {
            return fs.create(descriptorFile);
        }
    }
}