com.datatorrent.lib.io.fs.AbstractHdfsRollingFileOutputOperator.java Source code

Java tutorial

Introduction

Here is the source code for com.datatorrent.lib.io.fs.AbstractHdfsRollingFileOutputOperator.java

Source

/*
 * Copyright (c) 2014 DataTorrent, Inc. ALL Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.datatorrent.lib.io.fs;

import java.io.IOException;

import org.apache.hadoop.fs.Path;

/**
 * Adapter for writing to HDFS
 * <p>
 * Serializes tuples into a HDFS file<br>
 * Tuples are written to a single HDFS file or multiple HDFS files, with the option to specify size based file rolling,
 * using place holders in the file path pattern.<br>
 * Example file path pattern : file:///mydir/adviews.out.%(operatorId).part-%(partIndex). where operatorId and partIndex
 * are place holders.
 * </p>
 *
 * @param <T> input port tuple type
 * @since 0.9.4
 */
public abstract class AbstractHdfsRollingFileOutputOperator<T> extends AbstractHdfsFileOutputOperator<T> {
    protected int currentBytesWritten = 0;
    protected int bytesPerFile = 0;

    protected transient Path currentFilePath;
    /**
     * This variable specifies if the operator needs to close the file at every end window
     */
    protected boolean closeCurrentFile;

    /**
     * @return the closeCurrentFile
     */
    public boolean isCloseCurrentFile() {
        return closeCurrentFile;
    }

    /**
     * @param closeCurrentFile
     * the closeCurrentFile to set
     */
    public void setCloseCurrentFile(boolean closeCurrentFile) {
        this.closeCurrentFile = closeCurrentFile;
    }

    /**
     * Byte limit for a single file. Once the size is reached, a new file will be created.
     *
     * @param bytesPerFile
     */
    public void setBytesPerFile(int bytesPerFile) {
        this.bytesPerFile = bytesPerFile;
    }

    @Override
    public void beginWindow(long windowId) {
        if (fsOutput == null) {
            try {
                validateNextFilePath();
                openFile(currentFilePath);
            } catch (IOException e) {
                throw new RuntimeException("Failed to open the file.", e);
            }
            currentBytesWritten = 0;
        }
    }

    @Override
    public void endWindow() {
        try {
            if (closeCurrentFile) {
                closeFile();
            } else {
                if (bufferedOutput != null) {
                    bufferedOutput.flush();
                }
                fsOutput.hflush();
            }
        } catch (IOException ex) {
            throw new RuntimeException("Failed to flush.", ex);
        }
    }

    @Override
    protected void processTuple(T t) {
        try {
            // checks if the stream is open. If not then open a stream
            if (fsOutput == null) {
                validateNextFilePath();
                openFile(currentFilePath);
                currentBytesWritten = 0;
            }
            byte[] tupleBytes = getBytesForTuple(t);
            // checks for the rolling file
            if (bytesPerFile > 0 && currentBytesWritten + tupleBytes.length > bytesPerFile) {
                closeFile();
                validateNextFilePath();
                openFile(currentFilePath);
                currentBytesWritten = 0;
            }
            if (bufferedOutput != null) {
                bufferedOutput.write(tupleBytes);
            } else {
                fsOutput.write(tupleBytes);
            }
            currentBytesWritten += tupleBytes.length;
            totalBytesWritten += tupleBytes.length;
        } catch (IOException ex) {
            throw new RuntimeException("Failed to write to stream.", ex);
        }
    }

    /**
     * This checks if the new path is not same as old path. If it is then throw exception
     */
    private void validateNextFilePath() {
        Path filepath = nextFilePath();
        if (currentFilePath != null && filepath.equals(currentFilePath)) {
            throw new IllegalArgumentException(
                    "Rolling files require %() placeholders for unique names: " + filepath);
        }
        currentFilePath = filepath;
    }

    /**
     * This function returns the path for the file output. If the implementing class wants to use single file, then it can
     * return the same path every time o/w different based on the use case
     *
     *
     * @return
     */
    public abstract Path nextFilePath();

}