org.kepler.ddp.actor.pattern.DDPDataSink.java Source code

Java tutorial

Introduction

Here is the source code for org.kepler.ddp.actor.pattern.DDPDataSink.java

Source

/* Data output for DDP.
 * 
 * Copyright (c) 2011-2012 The Regents of the University of California.
 * All rights reserved.
 *
 * '$Author: crawl $'
 * '$Date: 2014-11-12 14:42:30 -0800 (Wed, 12 Nov 2014) $' 
 * '$Revision: 33062 $'
 * 
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and its documentation for any purpose, provided that the above
 * copyright notice and the following two paragraphs appear in all copies
 * of this software.
 *
 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
 * ENHANCEMENTS, OR MODIFICATIONS.
 *
 */
package org.kepler.ddp.actor.pattern;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.kepler.configuration.ConfigurationProperty;

import ptolemy.actor.TypedIOPort;
import ptolemy.data.ArrayToken;
import ptolemy.data.BooleanToken;
import ptolemy.data.StringToken;
import ptolemy.data.Token;
import ptolemy.data.expr.Parameter;
import ptolemy.data.type.ArrayType;
import ptolemy.data.type.BaseType;
import ptolemy.data.type.Type;
import ptolemy.kernel.CompositeEntity;
import ptolemy.kernel.util.IllegalActionException;
import ptolemy.kernel.util.NameDuplicationException;

/** This actor writes data to the storage system for a DDP workflow. 
 *  The actor reads a set of key-value pairs and combines them based
 *  on the data format specified in formatType.
 * 
 *  @author Daniel Crawl
 *  @version $Id: DDPDataSink.java 33062 2014-11-12 22:42:30Z crawl $
 */
public class DDPDataSink extends AtomicPathActor {

    /** Construct a new FileDataSink in a container with a given name. */
    public DDPDataSink(CompositeEntity container, String name)
            throws IllegalActionException, NameDuplicationException {
        super(container, name);

        in = new TypedIOPort(this, "in", true, false);

        // set the types for the in port
        _keyType = BaseType.GENERAL;
        _valueType = BaseType.GENERAL;

        out = new TypedIOPort(this, "out", false, true);
        out.setMultiport(true);

        // add the output formats in the config file as choices 
        _addFormats("Output");

        _FORMAT_TYPE_CATEGORY = "OutputFormats.Format";

        // set the default format
        _setFormat("LineOutputFormat");

        mergeMultiPartOutputs = new Parameter(this, "mergeMultiPartOutputs");
        mergeMultiPartOutputs.setTypeEquals(BaseType.BOOLEAN);
        mergeMultiPartOutputs.setToken(BooleanToken.TRUE);
    }

    /** Set a list of tokens for a specific DDPDataSink actor. */
    public static void addTokens(String sinkActorName, List<Token> tokenList) throws IllegalActionException {
        List<Token> tokens = _tokenMap.get(sinkActorName);
        if (tokens == null) {
            tokens = new ArrayList<Token>();
            _tokenMap.put(sinkActorName, tokens);
        }
        synchronized (tokens) {
            tokens.addAll(tokenList);
        }
    }

    /** Update the path parameter if connected. */
    @Override
    public boolean prefire() throws IllegalActionException {

        boolean rc = super.prefire();

        // make sure it's not empty
        if (!_formatTypeStr.equals("TokenOutputFormat") && !_formatTypeStr.equals("NullOutputFormat")
                && ((StringToken) path.getToken()).stringValue().trim().isEmpty()) {
            throw new IllegalActionException(this, "Path must not be empty.");
        }

        return rc;
    }

    /** Write the token in the path parameter to the out port. */
    @Override
    public void fire() throws IllegalActionException {

        if (_formatTypeStr.equals("TokenOutputFormat")) {
            // remove the tokens from the map so that these tokens
            // are not present in the next fire().
            List<Token> tokens = _tokenMap.remove(getFullName());
            if (tokens == null || tokens.isEmpty()) {
                throw new IllegalActionException(this, "No tokens were written.");
            }
            Token[] array = tokens.toArray(new Token[tokens.size()]);
            out.broadcast(new ArrayToken(array));
        } else if (_formatTypeStr.equals("NullOutputFormat")) {
            out.broadcast(new StringToken("done"));
        } else {

            if (((BooleanToken) mergeMultiPartOutputs.getToken()).booleanValue()) {
                _mergeMultiPartOutputs(((StringToken) path.getToken()).stringValue());
            }

            out.broadcast(path.getToken());
        }

    }

    /** Make sure output is either data or file, but not both. */
    @Override
    public void preinitialize() throws IllegalActionException {

        super.preinitialize();

        boolean pathIsConnected = (path.getPort().numberOfSources() > 0);

        Token pathToken = path.getToken();

        if ((_formatTypeStr.equals("TokenOutputFormat") || _formatTypeStr.equals("NullOutputFormat"))
                && (pathIsConnected || (pathToken != null && !((StringToken) pathToken).stringValue().isEmpty()))) {
            throw new IllegalActionException(this,
                    "TokenOutputFormat or NullOutputFormat and the path port/parameter cannot be used at the same time.\n"
                            + "Either change the output format, or disconnect the path port and clear the\n"
                            + "path parameter.");
        }

        /*
        if(!pathIsConnected && 
            (pathToken == null || ((StringToken)pathToken).stringValue().isEmpty())) {
        formatType.setToken("TokenOutputFormat");
        }
        */

    }

    /** Remove any tokens stored for this actor. */
    @Override
    public void wrapup() throws IllegalActionException {

        super.wrapup();

        _tokenMap.remove(getFullName());
    }

    /** The data to be written. */
    public TypedIOPort in;

    /** After data has been written, this port outputs the path. */
    public TypedIOPort out;

    /** If true, merge multiple output files into a single file. */
    public Parameter mergeMultiPartOutputs;

    ///////////////////////////////////////////////////////////////////
    ////                         protected methods                 ////

    /** Update the key and value types. */
    @Override
    protected void _updateKeyValueTypes() {
        super._updateKeyValueTypes();
        Type type = Types.createKeyValueArrayType(_keyType, _valueType);
        in.typeConstraints().clear();
        in.setTypeAtMost(type);
        out.typeConstraints().clear();
        out.setTypeEquals(BaseType.STRING);
    }

    /** Set the key and value types from the types in the configuration property. */
    @Override
    protected void _setTypesFromConfiguration(ConfigurationProperty formatProperty) throws IllegalActionException {

        // there is no formatProperty found for the key/value type, try to use it directly
        if (formatProperty == null) {
            String typesStr = keyValueTypes.stringValue();
            if (typesStr.isEmpty()) {
                throw new IllegalActionException(this,
                        "Parameter keyValueTypes has to be set if third party class\n"
                                + "is set for parameter formatType.");
            } else {
                in.typeConstraints().clear();
                in.setTypeAtMost(Types.getKeyValueType(keyValueTypes, typesStr));
                out.typeConstraints().clear();
                out.setTypeEquals(BaseType.STRING);
            }
        } else if (formatProperty.getProperty("Name").getValue().equals("TokenOutputFormat")) {
            in.setTypeEquals(BaseType.UNKNOWN);
            in.typeConstraints().clear();
            // FIXME want to set to at least unsized array of records with key and value
            in.setTypeAtLeast(ArrayType.ARRAY_UNSIZED_BOTTOM);
            out.setTypeEquals(BaseType.UNKNOWN);
            out.typeConstraints().clear();
            out.setTypeAtLeast(in);
        } else if (formatProperty.getProperty("Name").getValue().equals("NullOutputFormat")) {
            in.setTypeEquals(BaseType.UNKNOWN);
            in.typeConstraints().clear();
            // FIXME want to set to at least unsized array of records with key and value
            in.setTypeAtLeast(ArrayType.ARRAY_UNSIZED_BOTTOM);
            out.typeConstraints().clear();
            out.setTypeEquals(BaseType.STRING);
        } else {
            super._setTypesFromConfiguration(formatProperty);
        }
    }

    ///////////////////////////////////////////////////////////////////
    ////                         private methods                 ////

    /** Merge a directory containing multiple output files into a single file.
     *  This method deletes the directory when finished.
     * 
     * TODO move to parent class?
     */
    private void _mergeMultiPartOutputs(String pathStr) throws IllegalActionException {

        Configuration configuration = new Configuration();

        Path srcPath = new Path(pathStr);

        try {
            FileSystem srcPathFileSystem = srcPath.getFileSystem(configuration);
            // only merge if the output is a directory.
            if (srcPathFileSystem.isDirectory(srcPath)) {

                Path destPath = new Path(pathStr + "-TMP1234");

                try {
                    // TODO if there is only one part-r-nnnnnn file, copyMerge() will still
                    // copy it instead of simply renaming it. 
                    if (!FileUtil.copyMerge(srcPath.getFileSystem(configuration), srcPath,
                            destPath.getFileSystem(configuration), destPath, true, configuration, "")) {
                        throw new IllegalActionException(this, "Unable to merge output files in " + srcPath + "/.");
                    }
                } catch (IOException e) {
                    throw new IllegalActionException(this, e,
                            "Error merging multi-part output files in " + srcPath + "/.");
                }

                try {
                    if (!destPath.getFileSystem(configuration).rename(destPath, srcPath)) {
                        throw new IllegalActionException(this, "Unable to rename " + destPath + " to " + srcPath);
                    }
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        } catch (IOException e) {
            throw new IllegalActionException(this, e, "Error accessing output file " + srcPath);
        }
    }

    /** A mapping of DDPDataSink actor name to tokens. */
    private static final java.util.Map<String, List<Token>> _tokenMap = Collections
            .synchronizedMap(new HashMap<String, List<Token>>());

}