org.pentaho.di.trans.steps.solrinput.SolrInput.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.di.trans.steps.solrinput.SolrInput.java

Source

/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/

package org.pentaho.di.trans.steps.solrinput;

import java.util.ArrayList;
import java.util.List;

import org.pentaho.di.core.Const;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
import org.pentaho.di.trans.steps.solrinput.SolrInputData;
import org.pentaho.di.trans.steps.solrinput.SolrInputMeta;

import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.CursorMarkParams;

/**
 * This class is part of the demo step plug-in implementation.
 * It demonstrates the basics of developing a plug-in step for PDI. 
 * 
 * The demo step adds a new string field to the row stream and sets its
 * value to "Hello World!". The user may select the name of the new field.
 *   
 * This class is the implementation of StepInterface.
 * Classes implementing this interface need to:
 * 
 * - initialize the step
 * - execute the row processing logic
 * - dispose of the step 
 * 
 * Please do not create any local fields in a StepInterface class. Store any
 * information related to the processing logic in the supplied step data interface
 * instead.  
 * 
 */

public class SolrInput extends BaseStep implements StepInterface {

    private static Class<?> PKG = SolrInputMeta.class;
    private SolrInputMeta meta;
    private SolrInputData data;

    /**
     * The constructor should simply pass on its arguments to the parent class.
     * 
     * @param s             step description
     * @param stepDataInterface   step data class
     * @param c               step copy
     * @param t               transformation description
     * @param dis            transformation executing
     */
    public SolrInput(StepMeta s, StepDataInterface stepDataInterface, int c, TransMeta t, Trans dis) {
        super(s, stepDataInterface, c, t, dis);
    }

    /**
     * Once the transformation starts executing, the processRow() method is called repeatedly
     * by PDI for as long as it returns true. To indicate that a step has finished processing rows
     * this method must call setOutputDone() and return false;
     * 
     * Steps which process incoming rows typically call getRow() to read a single row from the
     * input stream, change or add row content, call putRow() to pass the changed row on 
     * and return true. If getRow() returns null, no more rows are expected to come in, 
     * and the processRow() implementation calls setOutputDone() and returns false to
     * indicate that it is done too.
     * 
     * Steps which generate rows typically construct a new row Object[] using a call to
     * RowDataUtil.allocateRowData(numberOfFields), add row content, and call putRow() to
     * pass the new row on. Above process may happen in a loop to generate multiple rows,
     * at the end of which processRow() would call setOutputDone() and return false;
     * 
     * @param smi the step meta interface containing the step settings
     * @param sdi the step data interface that should be used to store
     * 
     * @return true to indicate that the function should be called again, false if the step is done
     */
    public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {

        if (first) {
            first = false;
            // Create the output row meta-data
            data.outputRowMeta = new RowMeta();
            meta.getFields(data.outputRowMeta, getStepname(), null, null, this, repository, metaStore);
            // For String to <type> conversions, we allocate a conversion meta data row as well...
            data.convertRowMeta = data.outputRowMeta.cloneToType(ValueMetaInterface.TYPE_STRING);

            // get real values
            boolean tryCursor = true;
            Integer startRecord = 0;
            Integer chunkRowSize = 20;
            String realURL = meta.getURL();
            String realQ = meta.getQ();
            String realSort = meta.getSort();
            String realCursor = meta.getCursor();
            String realFq = meta.getFq();
            String realFl = meta.getFl();
            String realFacetQuery = meta.getFacetQuery();
            String realFacetField = meta.getFacetField();
            /* Send and Get the report */
            SolrQuery query = new SolrQuery();
            query.set("rows", chunkRowSize);
            if (realQ != null && !realQ.equals("")) {
                query.set("q", realQ);
            }
            if (realSort != null && !realSort.equals("")) {
                query.set("sort", realSort);
            } else {
                tryCursor = false;
            }
            if (realCursor != null && !realCursor.equals("")) {
                if (realCursor.equals("No")) {
                    tryCursor = false;
                }
            }
            if (realFl != null && !realFl.equals("")) {
                query.set("fl", realFl);
            }
            if (realFq != null && !realFq.equals("")) {
                query.set("fq", realFq);
            }
            if (realFacetField != null && !realFacetField.equals("")) {
                //TODO incorporate multiple facet fields at once
                //String[] facetFields = realFacetField.split("\\s*,\\s*");
                //for(int i =0; i < facetFields.length; i++){
                query.addFacetField(realFacetField);
                //}
                query.setFacet(true);
                query.setFacetLimit(-1);
                query.setFacetMinCount(0);
                query.setFacetSort("count");
                query.set("rows", 0);
                tryCursor = false;
                data.facetRequested = true;
            }
            if (realFacetQuery != null && !realFacetQuery.equals("")) {
                query.addFacetQuery(realFacetQuery);
            }
            // You can't use "TimeAllowed" with "CursorMark"
            // The documentation says "Values <= 0 mean 
            // no time restriction", so setting to 0.
            query.setTimeAllowed(0);
            HttpSolrServer solr = new HttpSolrServer(realURL);
            String cursorMark = CursorMarkParams.CURSOR_MARK_START;
            boolean done = false;
            QueryResponse rsp = null;
            while (!done) {
                if (tryCursor) {
                    query.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
                } else {
                    query.setStart(startRecord);
                }
                try {
                    rsp = solr.query(query);
                } catch (SolrServerException e) {
                    e.printStackTrace();
                }
                if (data.facetRequested) {
                    data.facetCountName = rsp.getFacetFields().get(0).getName();
                    data.facetCounts = rsp.getFacetFields().get(0).getValues();
                    done = true;
                } else {
                    SolrDocumentList theseDocs = rsp.getResults();
                    for (SolrDocument doc : theseDocs) {
                        data.documentList.add(doc);
                    }
                }
                if (tryCursor) {
                    String nextCursorMark = rsp.getNextCursorMark();
                    if (cursorMark.equals(nextCursorMark)) {
                        done = true;
                    } else {
                        cursorMark = nextCursorMark;
                    }
                } else {
                    startRecord = startRecord + chunkRowSize;
                    if (startRecord >= rsp.getResults().getNumFound()) {
                        done = true;
                    }
                }
            }
        }

        Object[] outputRowData = null;

        try {
            if (data.facetRequested) {
                // get one row if we can
                if (data.facetCounts.size() - 1 < data.recordIndex) {
                    setOutputDone();
                    return false;
                }
                FacetField.Count facetRecord = data.facetCounts.get(data.recordIndex);
                outputRowData = prepareFacetRecord(facetRecord);
            } else {
                // get one row if we can
                if (data.documentList.size() - 1 < data.recordIndex) {
                    setOutputDone();
                    return false;
                }
                SolrDocument record = data.documentList.get(data.recordIndex);
                outputRowData = prepareRecord(record);
            }
            putRow(data.outputRowMeta, outputRowData); // copy row to output rowset(s);
            data.recordIndex++;
            return true;
        } catch (KettleException e) {
            boolean sendToErrorRow = false;
            String errorMessage = null;
            if (getStepMeta().isDoingErrorHandling()) {
                sendToErrorRow = true;
                errorMessage = e.toString();
            } else {
                logError(BaseMessages.getString(PKG, "SolrInput.log.Exception", e.getMessage()));
                logError(Const.getStackTracker(e));
                setErrors(1);
                stopAll();
                setOutputDone(); // signal end to receiver(s)
                return false;
            }
            if (sendToErrorRow) {
                // Simply add this row to the error row
                putError(getInputRowMeta(), outputRowData, 1, errorMessage, null, "SolrInput001");
            }
        }
        return true;
    }

    private Object[] prepareRecord(SolrDocument record) throws KettleException {
        // Build an empty row based on the meta-data
        Object[] outputRowData = buildEmptyRow();
        java.util.Collection<String> thisNamesArray = record.getFieldNames();
        List<String> a = new ArrayList<String>(thisNamesArray);
        try {
            for (int i = 0; i < data.nrfields; i++) {
                String value = "";
                if (a.contains(meta.getInputFields()[i].getName())) {
                    value = record.getFieldValue(meta.getInputFields()[i].getName()).toString();
                }
                switch (meta.getInputFields()[i].getTrimType()) {
                case SolrInputField.TYPE_TRIM_LEFT:
                    value = Const.ltrim(value);
                    break;
                case SolrInputField.TYPE_TRIM_RIGHT:
                    value = Const.rtrim(value);
                    break;
                case SolrInputField.TYPE_TRIM_BOTH:
                    value = Const.trim(value);
                    break;
                default:
                    break;
                }
                // do conversions
                ValueMetaInterface targetValueMeta = data.outputRowMeta.getValueMeta(i);
                ValueMetaInterface sourceValueMeta = data.convertRowMeta.getValueMeta(i);
                outputRowData[i] = targetValueMeta.convertData(sourceValueMeta, value);
            } // End of loop over fields...
            RowMetaInterface irow = getInputRowMeta();
            data.previousRow = irow == null ? outputRowData : irow.cloneRow(outputRowData); // copy it to make
        } catch (Exception e) {
            throw new KettleException(BaseMessages.getString(PKG, "SolrInput.Exception.CanNotParseFromSolr"), e);
        }
        return outputRowData;
    }

    private Object[] prepareFacetRecord(FacetField.Count facetRecord) throws KettleException {
        // Build an empty row based on the meta-data
        Object[] outputRowData = buildEmptyRow();
        try {
            for (int i = 0; i < data.nrfields; i++) {
                String value = "";
                if (meta.getInputFields()[i].getName().equals(data.facetCountName)) {
                    value = facetRecord.getName();
                }
                if (meta.getInputFields()[i].getName().equals("count")) {
                    value = Long.toString(facetRecord.getCount());
                }
                switch (meta.getInputFields()[i].getTrimType()) {
                case SolrInputField.TYPE_TRIM_LEFT:
                    value = Const.ltrim(value);
                    break;
                case SolrInputField.TYPE_TRIM_RIGHT:
                    value = Const.rtrim(value);
                    break;
                case SolrInputField.TYPE_TRIM_BOTH:
                    value = Const.trim(value);
                    break;
                default:
                    break;
                }
                // do conversions
                ValueMetaInterface targetValueMeta = data.outputRowMeta.getValueMeta(i);
                ValueMetaInterface sourceValueMeta = data.convertRowMeta.getValueMeta(i);
                outputRowData[i] = targetValueMeta.convertData(sourceValueMeta, value);
            } // End of loop over fields...
            RowMetaInterface irow = getInputRowMeta();
            data.previousRow = irow == null ? outputRowData : irow.cloneRow(outputRowData); // copy it to make
        } catch (Exception e) {
            throw new KettleException(BaseMessages.getString(PKG, "SolrInput.Exception.CanNotParseFromSolr"), e);
        }
        return outputRowData;
    }

    /**
     * Build an empty row based on the meta-data.
     *
     * @return
     */
    private Object[] buildEmptyRow() {
        Object[] rowData = RowDataUtil.allocateRowData(data.outputRowMeta.size());
        return rowData;
    }

    /**
     * This method is called by PDI during transformation startup. 
     * 
     * It should initialize required for step execution. 
     * 
     * The meta and data implementations passed in can safely be cast
     * to the step's respective implementations. 
     * 
     * It is mandatory that super.init() is called to ensure correct behavior.
     * 
     * Typical tasks executed here are establishing the connection to a database,
     * as wall as obtaining resources, like file handles.
     * 
     * @param smi    step meta interface implementation, containing the step settings
     * @param sdi   step data interface implementation, used to store runtime information
     * 
     * @return true if initialization completed successfully, false if there was an error preventing the step from working. 
     *  
     */
    public boolean init(StepMetaInterface smi, StepDataInterface sdi) {

        meta = (SolrInputMeta) smi;
        data = (SolrInputData) sdi;

        if (super.init(smi, sdi)) {
            // get total fields in the grid
            data.nrfields = meta.getInputFields().length;
            // Check if field list is filled
            if (data.nrfields == 0) {
                log.logError(BaseMessages.getString(PKG, "SolrInputDialog.FieldsMissing.DialogMessage"));
                return false;
            }
            // check url
            String realURL = environmentSubstitute(meta.getURL());
            if (Const.isEmpty(realURL)) {
                log.logError(BaseMessages.getString(PKG, "SolrInput.UsernameMissing.Error"));
                return false;
            }
            try {
                data.solr = new HttpSolrServer(realURL);
                return true;
            } catch (Exception e) {
                log.logError(BaseMessages.getString(PKG, "SolrInput.Log.ErrorOccurredDuringStepInitialize"), e);
            }
            return true;
        }
        return false;
    }

    /**
     * This method is called by PDI once the step is done processing. 
     * 
     * The dispose() method is the counterpart to init() and should release any resources
     * acquired for step execution like file handles or database connections.
     * 
     * The meta and data implementations passed in can safely be cast
     * to the step's respective implementations. 
     * 
     * It is mandatory that super.dispose() is called to ensure correct behavior.
     * 
     * @param smi    step meta interface implementation, containing the step settings
     * @param sdi   step data interface implementation, used to store runtime information
     */
    public void dispose(StepMetaInterface smi, StepDataInterface sdi) {

        // Casting to step-specific implementation classes is safe
        meta = (SolrInputMeta) smi;
        data = (SolrInputData) sdi;

        try {
            if (data.outputRowMeta != null) {
                data.outputRowMeta = null;
            }
            if (data.convertRowMeta != null) {
                data.convertRowMeta = null;
            }
        } catch (Exception e) { /* Ignore */
        }
        super.dispose(smi, sdi);
    }

}