org.pentaho.di.trans.steps.samplerows.SampleRows.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.di.trans.steps.samplerows.SampleRows.java

Source

/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.trans.steps.samplerows;

import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;

import com.google.common.collect.ImmutableRangeSet;
import com.google.common.collect.Range;

/**
 * Sample rows. Filter rows based on line number
 *
 * @author Samatar
 * @since 2-jun-2003
 */

public class SampleRows extends BaseStep implements StepInterface {
    private static Class<?> PKG = SampleRowsMeta.class; // for i18n purposes, needed by Translator2!!

    private SampleRowsMeta meta;
    private SampleRowsData data;

    public SampleRows(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
            Trans trans) {
        super(stepMeta, stepDataInterface, copyNr, transMeta, trans);
    }

    public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
        meta = (SampleRowsMeta) smi;
        data = (SampleRowsData) sdi;

        Object[] r = getRow(); // get row, set busy!
        if (r == null) { // no more input to be expected...

            setOutputDone();
            return false;
        }
        if (first) {
            first = false;

            String realRange = environmentSubstitute(meta.getLinesRange());
            data.addlineField = (!Utils.isEmpty(environmentSubstitute(meta.getLineNumberField())));

            // get the RowMeta
            data.previousRowMeta = getInputRowMeta().clone();
            data.NrPrevFields = data.previousRowMeta.size();
            data.outputRowMeta = data.previousRowMeta;
            if (data.addlineField) {
                meta.getFields(data.outputRowMeta, getStepname(), null, null, this, repository, metaStore);
            }

            String[] rangePart = realRange.split(",");
            ImmutableRangeSet.Builder<Integer> setBuilder = ImmutableRangeSet.builder();

            for (String part : rangePart) {
                if (part.matches("\\d+")) {
                    if (log.isDebug()) {
                        logDebug(BaseMessages.getString(PKG, "SampleRows.Log.RangeValue", part));
                    }
                    int vpart = Integer.valueOf(part);
                    setBuilder.add(Range.singleton(vpart));

                } else if (part.matches("\\d+\\.\\.\\d+")) {
                    String[] rangeMultiPart = part.split("\\.\\.");
                    Integer start = Integer.valueOf(rangeMultiPart[0]);
                    Integer end = Integer.valueOf(rangeMultiPart[1]);
                    Range<Integer> range = Range.closed(start, end);
                    if (log.isDebug()) {
                        logDebug(BaseMessages.getString(PKG, "SampleRows.Log.RangeValue", range));
                    }
                    setBuilder.add(range);
                }
            }
            data.rangeSet = setBuilder.build();
        } // end if first

        if (data.addlineField) {
            data.outputRow = RowDataUtil.allocateRowData(data.outputRowMeta.size());
            for (int i = 0; i < data.NrPrevFields; i++) {
                data.outputRow[i] = r[i];
            }
        } else {
            data.outputRow = r;
        }

        int linesRead = (int) getLinesRead();
        if (data.rangeSet.contains(linesRead)) {
            if (data.addlineField) {
                data.outputRow[data.NrPrevFields] = getLinesRead();
            }

            // copy row to possible alternate rowset(s).
            //
            putRow(data.outputRowMeta, data.outputRow);

            if (log.isRowLevel()) {
                logRowlevel(BaseMessages.getString(PKG, "SampleRows.Log.LineNumber",
                        linesRead + " : " + getInputRowMeta().getString(r)));
            }
        }

        // Check if maximum value has been exceeded
        if (data.rangeSet.isEmpty() || linesRead >= data.rangeSet.span().upperEndpoint()) {
            setOutputDone();
        }

        // Allowed to continue to read in data
        return true;
    }

    public boolean init(StepMetaInterface smi, StepDataInterface sdi) {
        meta = (SampleRowsMeta) smi;
        data = (SampleRowsData) sdi;

        if (super.init(smi, sdi)) {
            // Add init code here.
            return true;
        }
        return false;
    }

}