com.inmobi.conduit.distcp.tools.mapred.lib.DynamicInputChunkSet.java Source code

Java tutorial

Introduction

Here is the source code for com.inmobi.conduit.distcp.tools.mapred.lib.DynamicInputChunkSet.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.inmobi.conduit.distcp.tools.mapred.lib;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.inmobi.conduit.distcp.tools.DistCpConstants;
import com.inmobi.conduit.distcp.tools.util.HadoopCompat;

import java.io.IOException;

/**
 * The DynamicInputChunkSet abstracts the context in which a DynamicInputChunk
 * is constructed, acquired or released.
 * There is one instance of DynamicInputChunkSet for each unique Hadoop job
 * that uses the DynamicInputFormat.
 */
public class DynamicInputChunkSet {
    private static Log LOG = LogFactory.getLog(DynamicInputChunkSet.class);

    private Configuration configuration;
    private Path chunkRootPath;
    private String chunkFilePrefix;
    private int numChunksLeft = -1; // Un-initialized before 1st dir-scan.
    private FileSystem fs;

    /**
     * Constructor, to initialize the context in which DynamicInputChunks are
     * used.
     * @param configuration The Configuration instance, as received from the
     * DynamicInputFormat or DynamicRecordReader.
     * @throws IOException Exception in case of failure.
     */
    public DynamicInputChunkSet(Configuration configuration) throws IOException {
        this.configuration = configuration;
        Path listingFilePath = new Path(getListingFilePath(configuration));
        chunkRootPath = new Path(listingFilePath.getParent(), "chunkDir");
        fs = chunkRootPath.getFileSystem(configuration);
        chunkFilePrefix = listingFilePath.getName() + ".chunk.";
    }

    /**
     * Getter for the Configuration with which the DynamicInputChunkSet was
     * constructed.
     * @return Configuration object.
     */
    public Configuration getConf() {
        return configuration;
    }

    /**
     * The root-path of the directory where DynamicInputChunks are stored.
     * @return The chunk-directory location.
     */
    public Path getChunkRootPath() {
        return chunkRootPath;
    }

    /**
     * The string with which all chunk-file-names are prefixed.
     * @return Prefix string, for all chunk files.
     */
    public String getChunkFilePrefix() {
        return chunkFilePrefix;
    }

    /**
     * Number of chunk-files left, on last directory scan.
     * @return If chunk-directory hasn't been scanned yet, -1. Otherwise, the
     * number of chunk-files left.
     */
    public int getNumChunksLeft() {
        return numChunksLeft;
    }

    /**
     * FileSystem instance, for the file-system where the chunk-files are stored.
     * @return FileSystem instance.
     */
    public FileSystem getFileSystem() {
        return fs;
    }

    private static String getListingFilePath(Configuration configuration) {
        final String listingFileString = configuration.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, "");
        assert !listingFileString.equals("") : "Listing file not found.";
        return listingFileString;
    }

    /**
     * Factory method to create chunk-files for writing to.
     * (For instance, when the DynamicInputFormat splits the input-file into
     * chunks.)
     * @param chunkId String to identify the chunk.
     * @return A DynamicInputChunk, corresponding to a chunk-file, with the name
     * incorporating the chunk-id.
     * @throws IOException Exception on failure to create the chunk.
     */
    public DynamicInputChunk createChunkForWrite(String chunkId) throws IOException {
        return new DynamicInputChunk(chunkId, this);
    }

    /**
     * Factory method that
     * 1. acquires a chunk for the specified map-task attempt
     * 2. returns a DynamicInputChunk associated with the acquired chunk-file.
     * @param taskAttemptContext The attempt-context for the map task that's
     * trying to acquire a chunk.
     * @return The acquired dynamic-chunk. The chunk-file is renamed to the
     * attempt-id (from the attempt-context.)
     * @throws IOException Exception on failure.
     * @throws InterruptedException Exception on failure.
     */
    public DynamicInputChunk acquire(TaskAttemptContext taskAttemptContext)
            throws IOException, InterruptedException {

        String taskId = HadoopCompat.getTaskAttemptID(taskAttemptContext).getTaskID().toString();
        Path acquiredFilePath = new Path(chunkRootPath, taskId);

        if (fs.exists(acquiredFilePath)) {
            LOG.info("Acquiring pre-assigned chunk: " + acquiredFilePath);
            return new DynamicInputChunk(acquiredFilePath, taskAttemptContext);
        }

        for (FileStatus chunkFile : getListOfChunkFiles()) {
            if (fs.rename(chunkFile.getPath(), acquiredFilePath)) {
                LOG.info(taskId + " acquired " + chunkFile.getPath());
                return new DynamicInputChunk(acquiredFilePath, taskAttemptContext);
            } else
                LOG.warn(taskId + " could not acquire " + chunkFile.getPath());
        }

        return null;
    }

    FileStatus[] getListOfChunkFiles() throws IOException {
        Path chunkFilePattern = new Path(chunkRootPath, chunkFilePrefix + "*");
        FileStatus chunkFiles[] = fs.globStatus(chunkFilePattern);
        numChunksLeft = chunkFiles.length;
        return chunkFiles;
    }
}