com.panet.imeta.trans.steps.pgbulkloader.PGBulkLoader.java Source code

Java tutorial

Introduction

Here is the source code for com.panet.imeta.trans.steps.pgbulkloader.PGBulkLoader.java

Source

/* Copyright (c) 2007 Pentaho Corporation.  All rights reserved. 
* This software was developed by Pentaho Corporation and is provided under the terms 
* of the GNU Lesser General Public License, Version 2.1. You may not use 
* this file except in compliance with the license. If you need a copy of the license, 
* please go to http://www.gnu.org/licenses/lgpl-2.1.txt. The Original Code is Pentaho 
* Data Integration.  The Initial Developer is Pentaho Corporation.
*
* Software distributed under the GNU Lesser Public License is distributed on an "AS IS" 
* basis, WITHOUT WARRANTY OF ANY KIND, either express or  implied. Please refer to 
* the license for the specific language governing your rights and limitations.*/

package com.panet.imeta.trans.steps.pgbulkloader;

//
// The "designer" notes of the PostgreSQL bulkloader:
// ----------------------------------------------
//
// Let's see how fast we can push data down the tube with the use of COPY FROM STDIN 
//
// 

import java.io.IOException;
import java.math.BigDecimal;

import org.apache.commons.vfs.FileObject;

import com.panet.imeta.core.Const;
import com.panet.imeta.core.database.DatabaseMeta;
import com.panet.imeta.core.exception.KettleException;
import com.panet.imeta.core.row.RowMetaInterface;
import com.panet.imeta.core.row.ValueMetaInterface;
import com.panet.imeta.core.util.StreamLogger;
import com.panet.imeta.core.vfs.KettleVFS;
import com.panet.imeta.trans.Trans;
import com.panet.imeta.trans.TransMeta;
import com.panet.imeta.trans.step.BaseStep;
import com.panet.imeta.trans.step.StepDataInterface;
import com.panet.imeta.trans.step.StepInterface;
import com.panet.imeta.trans.step.StepMeta;
import com.panet.imeta.trans.step.StepMetaInterface;

/**
 * Performs a bulk load to a postgres table.
 *
 * Based on (copied from) Sven Boden's Oracle Bulk Loader step
 * 
 * @author matt
 * @since  28-mar-2008
 */
public class PGBulkLoader extends BaseStep implements StepInterface {
    private PGBulkLoaderMeta meta;
    private PGBulkLoaderData data;

    public PGBulkLoader(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
            Trans trans) {
        super(stepMeta, stepDataInterface, copyNr, transMeta, trans);
    }

    /**
     * Get the contents of the control file as specified in the meta object
     * 
     * @param meta the meta object to model the control file after
     * 
     * @return a string containing the control file contents
     */
    public String getCopyCommand(RowMetaInterface rm, Object[] r) throws KettleException {
        DatabaseMeta dm = meta.getDatabaseMeta();

        String loadAction = meta.getLoadAction();

        StringBuffer contents = new StringBuffer(500);

        String tableName = dm.getQuotedSchemaTableCombination(environmentSubstitute(meta.getSchemaName()),
                environmentSubstitute(meta.getTableName()));

        // Set the date style...
        //
        // contents.append("SET DATESTYLE ISO;"); // This is the default but we set it anyway...
        // contents.append(Const.CR);

        // Create a Postgres / Greenplum COPY string for use with a psql client
        if (loadAction.equalsIgnoreCase("truncate")) {
            contents.append("TRUNCATE TABLE ");
            contents.append(tableName + ";");
            contents.append(Const.CR);
        }
        contents.append("COPY ");
        // Table name

        contents.append(tableName);

        // Names of columns

        contents.append(" ( ");

        String streamFields[] = meta.getFieldStream();
        String tableFields[] = meta.getFieldTable();

        if (streamFields == null || streamFields.length == 0) {
            throw new KettleException("No fields defined to load to database");
        }

        for (int i = 0; i < streamFields.length; i++) {
            if (i != 0)
                contents.append(", ");
            contents.append(dm.quoteField(tableFields[i]));
        }

        contents.append(" ) ");

        // The "FROM" filename
        contents.append(" FROM STDIN"); // FIFO file

        // The "FORMAT" clause
        contents.append(" WITH CSV DELIMITER AS '").append(meta.getDelimiter()).append("' QUOTE AS '")
                .append(meta.getEnclosure()).append("'");
        contents.append(";").append(Const.CR);

        return contents.toString();
    }

    /**
     * Create the command line for a psql process depending on the meta
     * information supplied.
     * 
     * @param meta The meta data to create the command line from
     * @param password Use the real password or not
     * 
     * @return The string to execute.
     * 
     * @throws KettleException Upon any exception
     */
    public String createCommandLine(PGBulkLoaderMeta meta, boolean password) throws KettleException {
        StringBuffer sb = new StringBuffer(300);

        if (meta.getPsqlpath() != null) {
            try {
                FileObject fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getPsqlpath()));
                String psqlexec = KettleVFS.getFilename(fileObject);
                sb.append(psqlexec);
            } catch (IOException ex) {
                throw new KettleException("Error retrieving sqlldr string", ex);
            }
        } else {
            throw new KettleException("No psql application specified");
        }

        DatabaseMeta dm = meta.getDatabaseMeta();
        if (dm != null) {

            // Note: Passwords are not supported directly, try configuring your connection for trusted access using pg_hba.conf
            //

            // The username
            // 
            // String user = Const.NVL(dm.getUsername(), "");
            // sb.append(" -U ").append(environmentSubstitute(user));

            // Hostname and portname
            //
            String hostname = Const.NVL(dm.getHostname(), "");
            String portnum = Const.NVL(dm.getDatabasePortNumberString(), "");
            sb.append(" -h ");
            sb.append(hostname);
            sb.append(" -p ");
            sb.append(portnum);

            // Database Name
            // 
            String dns = Const.NVL(dm.getDatabaseName(), "");
            sb.append(" -d ");
            String overrideName = meta.getDbNameOverride();
            if (Const.isEmpty(Const.rtrim(overrideName))) {
                sb.append(environmentSubstitute(dns));
            } else {
                // if the database name override is filled in, do that one.
                sb.append(environmentSubstitute(overrideName));
            }
        } else {
            throw new KettleException("No connection specified");
        }

        return sb.toString();
    }

    public boolean execute(PGBulkLoaderMeta meta, boolean wait) throws KettleException {
        Runtime rt = Runtime.getRuntime();

        try {
            String cmd = createCommandLine(meta, true);
            logBasic("Executing command: " + cmd);
            data.psqlProcess = rt.exec(cmd);

            // any error message?
            //
            data.errorLogger = new StreamLogger(data.psqlProcess.getErrorStream(), "ERROR");

            // any output?
            data.outputLogger = new StreamLogger(data.psqlProcess.getInputStream(), "OUTPUT");

            // Where do we send the data to? --> To STDIN of the psql process
            //
            data.pgOutputStream = data.psqlProcess.getOutputStream();

            // kick them off
            new Thread(data.errorLogger).start();
            new Thread(data.outputLogger).start();

            // OK, from here on, we need to feed in the COPY command followed by the data into the pgOutputStream
            //
        } catch (Exception ex) {
            throw new KettleException("Error while executing psql : " + createCommandLine(meta, false), ex);
        }

        return true;
    }

    public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
        meta = (PGBulkLoaderMeta) smi;
        data = (PGBulkLoaderData) sdi;

        try {
            Object[] r = getRow(); // Get row from input rowset & set row busy!

            if (r == null) // no more input to be expected...
            {
                setOutputDone();

                // Close the output stream...
                //
                data.pgOutputStream.flush();
                data.pgOutputStream.close();

                // wait for the pgsql process to finish and check for any error...
                //
                int exitVal = data.psqlProcess.waitFor();
                logBasic(Messages.getString("GPBulkLoader.Log.ExitValuePsqlPath", "" + exitVal)); //$NON-NLS-1$

                return false;
            }

            if (first) {
                first = false;

                // Cache field indexes.
                //
                data.keynrs = new int[meta.getFieldStream().length];
                for (int i = 0; i < data.keynrs.length; i++) {
                    data.keynrs[i] = getInputRowMeta().indexOfValue(meta.getFieldStream()[i]);
                }

                // execute the psql statement...
                //
                execute(meta, true);

                String copyCmd = getCopyCommand(getInputRowMeta(), r);
                logBasic("Launching command: " + copyCmd);
                data.pgOutputStream.write(copyCmd.getBytes());

                // Write rows of data hereafter...
                //
            }

            writeRowToPostgres(getInputRowMeta(), r);

            putRow(getInputRowMeta(), r);
            incrementLinesOutput();

            return true;
        } catch (Exception e) {
            logError(Messages.getString("GPBulkLoader.Log.ErrorInStep"), e); //$NON-NLS-1$
            setErrors(1);
            stopAll();
            setOutputDone(); // signal end to receiver(s)
            return false;
        }
    }

    private void writeRowToPostgres(RowMetaInterface rowMeta, Object[] r) throws KettleException {

        try {
            // So, we have this output stream to which we can write CSV data to.
            // Basically, what we need to do is write the binary data (from strings to it as part of this proof of concept)
            //
            // Let's assume the data is in the correct format here too.
            //
            for (int i = 0; i < data.keynrs.length; i++) {
                if (i > 0) {
                    // Write a separator 
                    //
                    data.pgOutputStream.write(data.separator);
                }

                int index = data.keynrs[i];
                ValueMetaInterface valueMeta = rowMeta.getValueMeta(index);
                Object valueData = r[index];

                if (valueData != null) {
                    switch (valueMeta.getType()) {
                    case ValueMetaInterface.TYPE_STRING:
                        data.pgOutputStream.write(data.quote);
                        if (valueMeta.isStorageBinaryString()) {
                            // lazy conversion.  For this test, just dump the data to the output stream.
                            //

                            data.pgOutputStream.write((byte[]) valueData);
                        } else {
                            data.pgOutputStream.write(valueMeta.getString(valueData).getBytes());
                        }
                        data.pgOutputStream.write(data.quote);
                        break;
                    case ValueMetaInterface.TYPE_INTEGER:
                        if (valueMeta.isStorageBinaryString()) {
                            data.pgOutputStream.write((byte[]) valueData);
                        } else {
                            data.pgOutputStream.write(Long.toString(valueMeta.getInteger(valueData)).getBytes());
                        }
                        break;
                    case ValueMetaInterface.TYPE_DATE:
                        // Format the date in the right format.
                        // 
                        switch (data.dateFormatChoices[i]) {
                        // Pass the data along in the format chosen by the user OR in binary format...
                        //
                        case PGBulkLoaderMeta.NR_DATE_MASK_PASS_THROUGH: {
                            if (valueMeta.isStorageBinaryString()) {
                                data.pgOutputStream.write((byte[]) valueData);
                            } else {
                                String dateString = valueMeta.getString(valueData);
                                if (dateString != null) {
                                    data.pgOutputStream.write(dateString.getBytes());
                                }
                            }
                        }
                            break;
                        // Convert to a "YYYY/MM/DD" format
                        //
                        case PGBulkLoaderMeta.NR_DATE_MASK_DATE: {
                            String dateString = data.dateMeta.getString(valueMeta.getDate(valueData));
                            if (dateString != null) {
                                data.pgOutputStream.write(dateString.getBytes());
                            }
                        }
                            break;
                        // Convert to a "YYYY/MM/DD HH:MM:SS" (ISO) format
                        //
                        case PGBulkLoaderMeta.NR_DATE_MASK_DATETIME: {
                            String dateTimeString = data.dateTimeMeta.getString(valueMeta.getDate(valueData));
                            if (dateTimeString != null) {
                                data.pgOutputStream.write(dateTimeString.getBytes());
                            }
                        }
                            break;
                        }
                        break;
                    case ValueMetaInterface.TYPE_BOOLEAN:
                        if (valueMeta.isStorageBinaryString()) {
                            data.pgOutputStream.write((byte[]) valueData);
                        } else {
                            data.pgOutputStream.write(Double.toString(valueMeta.getNumber(valueData)).getBytes());
                        }
                        break;
                    case ValueMetaInterface.TYPE_NUMBER:
                        if (valueMeta.isStorageBinaryString()) {
                            data.pgOutputStream.write((byte[]) valueData);
                        } else {
                            data.pgOutputStream.write(Double.toString(valueMeta.getNumber(valueData)).getBytes());
                        }
                        break;
                    case ValueMetaInterface.TYPE_BIGNUMBER:
                        if (valueMeta.isStorageBinaryString()) {
                            data.pgOutputStream.write((byte[]) valueData);
                        } else {
                            BigDecimal big = valueMeta.getBigNumber(valueData);
                            if (big != null) {
                                data.pgOutputStream.write(big.toString().getBytes());
                            }
                        }
                        break;
                    }
                }
            }

            // Now write a newline
            //
            data.pgOutputStream.write(data.newline);
        } catch (Exception e) {
            throw new KettleException("Error serializing rows of data to the psql command", e);
        }

    }

    public boolean init(StepMetaInterface smi, StepDataInterface sdi) {
        meta = (PGBulkLoaderMeta) smi;
        data = (PGBulkLoaderData) sdi;

        if (super.init(smi, sdi)) {
            if (meta.getEnclosure() != null)
                data.quote = meta.getEnclosure().getBytes();
            else
                data.quote = new byte[] {};
            if (meta.getDelimiter() != null)
                data.separator = meta.getDelimiter().getBytes();
            else
                data.separator = new byte[] {};
            data.newline = Const.CR.getBytes();

            data.dateFormatChoices = new int[meta.getFieldStream().length];
            for (int i = 0; i < data.dateFormatChoices.length; i++) {
                if (Const.isEmpty(meta.getDateMask()[i])) {
                    data.dateFormatChoices[i] = PGBulkLoaderMeta.NR_DATE_MASK_PASS_THROUGH;
                } else if (meta.getDateMask()[i].equalsIgnoreCase(PGBulkLoaderMeta.DATE_MASK_DATE)) {
                    data.dateFormatChoices[i] = PGBulkLoaderMeta.NR_DATE_MASK_DATE;
                } else if (meta.getDateMask()[i].equalsIgnoreCase(PGBulkLoaderMeta.DATE_MASK_DATETIME)) {
                    data.dateFormatChoices[i] = PGBulkLoaderMeta.NR_DATE_MASK_DATETIME;
                } else { // The default : just pass it along...
                    data.dateFormatChoices[i] = PGBulkLoaderMeta.NR_DATE_MASK_PASS_THROUGH;
                }

            }
            return true;
        }
        return false;
    }

    //
    // Run is were the action happens!
    public void run() {
        try {
            logBasic(Messages.getString("System.Log.StartingToRun")); //$NON-NLS-1$

            while (processRow(meta, data) && !isStopped())
                ;
        } catch (Throwable t) {
            logError(Messages.getString("System.Log.UnexpectedError") + " : "); //$NON-NLS-1$ //$NON-NLS-2$
            logError(Const.getStackTracker(t));
            setErrors(1);
            stopAll();
        } finally {
            dispose(meta, data);
            logSummary();
            markStop();
        }
    }
}