com.yahoo.glimmer.util.MapReducePartInputStreamEnumeration.java Source code

Java tutorial

Introduction

Here is the source code for com.yahoo.glimmer.util.MapReducePartInputStreamEnumeration.java

Source

package com.yahoo.glimmer.util;

/*
 * Copyright (c) 2012 Yahoo! Inc. All rights reserved.
 * 
 *  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
 *  Unless required by applicable law or agreed to in writing, software distributed under the License is 
 *  distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and limitations under the License.
 *  See accompanying LICENSE file.
 */

import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;

/**
 * For use with {@link java.io.SequenceInputStream} to read hadoop
 * dir/part-?-?????.. files as a single {@link java.io.InputStream}.
 * 
 * @author tep
 * 
 */
public class MapReducePartInputStreamEnumeration implements Enumeration<InputStream> {
    private final FileSystem fileSystem;
    private final FileStatus[] partFileStatuses;
    private int partFileStatusesIndex;
    private final CompressionCodec codecIfAny;

    public MapReducePartInputStreamEnumeration(FileSystem fileSystem, Path srcPath) throws IOException {
        this.fileSystem = fileSystem;

        CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
        codecIfAny = factory.getCodec(srcPath);

        FileStatus srcFileStatus = fileSystem.getFileStatus(srcPath);
        if (srcFileStatus.isDirectory()) {
            // returns FileStatus objects sorted by filename.
            String partFilenamePattern = "part-?-?????";
            if (codecIfAny != null) {
                partFilenamePattern += codecIfAny.getDefaultExtension();
            }
            Path partPathGlob = new Path(srcPath, partFilenamePattern);
            partFileStatuses = fileSystem.globStatus(partPathGlob);
        } else {
            partFileStatuses = new FileStatus[] { srcFileStatus };
        }

    }

    @Override
    public boolean hasMoreElements() {
        return partFileStatusesIndex < partFileStatuses.length;
    }

    @Override
    public InputStream nextElement() {
        FileStatus partStatus = partFileStatuses[partFileStatusesIndex++];
        try {
            // SequenceInputStream calls InputStream.close() for us..
            InputStream is = fileSystem.open(partStatus.getPath());
            if (codecIfAny != null) {
                is = codecIfAny.createInputStream(is);
            }
            return is;
        } catch (IOException e) {
            throw new RuntimeException("Failed to open part file " + partStatus.getPath(), e);
        }
    }

    public void reset() {
        partFileStatusesIndex = 0;
    }

    public String removeCompressionSuffixIfAny(String filename) {
        if (codecIfAny != null) {
            return CompressionCodecFactory.removeSuffix(filename, codecIfAny.getDefaultExtension());
        } else {
            return filename;
        }
    }
}