org.pentaho.di.job.entries.xmlwellformed.JobEntryXMLWellFormed.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.di.job.entries.xmlwellformed.JobEntryXMLWellFormed.java

Source

/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.job.entries.xmlwellformed;

import static org.pentaho.di.job.entry.validator.AbstractFileValidator.putVariableSpace;
import static org.pentaho.di.job.entry.validator.AndValidator.putValidators;
import static org.pentaho.di.job.entry.validator.JobEntryValidatorUtils.andValidator;
import static org.pentaho.di.job.entry.validator.JobEntryValidatorUtils.fileExistsValidator;
import static org.pentaho.di.job.entry.validator.JobEntryValidatorUtils.notNullValidator;

import java.io.IOException;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.vfs.AllFileSelector;
import org.apache.commons.vfs.FileObject;
import org.apache.commons.vfs.FileSelectInfo;
import org.apache.commons.vfs.FileType;
import org.pentaho.di.cluster.SlaveServer;
import org.pentaho.di.core.CheckResultInterface;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.Result;
import org.pentaho.di.core.ResultFile;
import org.pentaho.di.core.RowMetaAndData;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleDatabaseException;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleXMLException;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.core.xml.XMLCheck;
import org.pentaho.di.core.xml.XMLHandler;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.job.Job;
import org.pentaho.di.job.JobMeta;
import org.pentaho.di.job.entry.JobEntryBase;
import org.pentaho.di.job.entry.JobEntryInterface;
import org.pentaho.di.job.entry.validator.ValidatorContext;
import org.pentaho.di.repository.ObjectId;
import org.pentaho.di.repository.Repository;
import org.pentaho.metastore.api.IMetaStore;
import org.w3c.dom.Node;
import org.xml.sax.helpers.DefaultHandler;

/**
 * This defines a 'xml well formed' job entry.
 *
 * @author Samatar Hassan
 * @since 26-03-2008
 */

public class JobEntryXMLWellFormed extends JobEntryBase implements Cloneable, JobEntryInterface {
    private static Class<?> PKG = JobEntryXMLWellFormed.class; // for i18n purposes, needed by Translator2!!

    public String SUCCESS_IF_AT_LEAST_X_FILES_WELL_FORMED = "success_when_at_least";
    public String SUCCESS_IF_BAD_FORMED_FILES_LESS = "success_if_bad_formed_files_less";
    public String SUCCESS_IF_NO_ERRORS = "success_if_no_errors";

    public String ADD_ALL_FILENAMES = "all_filenames";
    public String ADD_WELL_FORMED_FILES_ONLY = "only_well_formed_filenames";
    public String ADD_BAD_FORMED_FILES_ONLY = "only_bad_formed_filenames";

    public boolean arg_from_previous;
    public boolean include_subfolders;

    public String[] source_filefolder;
    public String[] wildcard;
    private String nr_errors_less_than;
    private String success_condition;
    private String resultfilenames;

    int NrAllErrors = 0;
    int NrBadFormed = 0;
    int NrWellFormed = 0;
    int limitFiles = 0;
    int NrErrors = 0;

    boolean successConditionBroken = false;
    boolean successConditionBrokenExit = false;

    public JobEntryXMLWellFormed(String n) {
        super(n, "");
        resultfilenames = ADD_ALL_FILENAMES;
        arg_from_previous = false;
        source_filefolder = null;
        wildcard = null;
        include_subfolders = false;
        nr_errors_less_than = "10";
        success_condition = SUCCESS_IF_NO_ERRORS;
    }

    public JobEntryXMLWellFormed() {
        this("");
    }

    public Object clone() {
        JobEntryXMLWellFormed je = (JobEntryXMLWellFormed) super.clone();
        return je;
    }

    public String getXML() {
        StringBuffer retval = new StringBuffer(300);

        retval.append(super.getXML());
        retval.append("      ").append(XMLHandler.addTagValue("arg_from_previous", arg_from_previous));
        retval.append("      ").append(XMLHandler.addTagValue("include_subfolders", include_subfolders));
        retval.append("      ").append(XMLHandler.addTagValue("nr_errors_less_than", nr_errors_less_than));
        retval.append("      ").append(XMLHandler.addTagValue("success_condition", success_condition));
        retval.append("      ").append(XMLHandler.addTagValue("resultfilenames", resultfilenames));
        retval.append("      <fields>").append(Const.CR);
        if (source_filefolder != null) {
            for (int i = 0; i < source_filefolder.length; i++) {
                retval.append("        <field>").append(Const.CR);
                retval.append("          ")
                        .append(XMLHandler.addTagValue("source_filefolder", source_filefolder[i]));
                retval.append("          ").append(XMLHandler.addTagValue("wildcard", wildcard[i]));
                retval.append("        </field>").append(Const.CR);
            }
        }
        retval.append("      </fields>").append(Const.CR);

        return retval.toString();
    }

    public void loadXML(Node entrynode, List<DatabaseMeta> databases, List<SlaveServer> slaveServers,
            Repository rep, IMetaStore metaStore) throws KettleXMLException {
        try {
            super.loadXML(entrynode, databases, slaveServers);

            arg_from_previous = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "arg_from_previous"));
            include_subfolders = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "include_subfolders"));

            nr_errors_less_than = XMLHandler.getTagValue(entrynode, "nr_errors_less_than");
            success_condition = XMLHandler.getTagValue(entrynode, "success_condition");
            resultfilenames = XMLHandler.getTagValue(entrynode, "resultfilenames");

            Node fields = XMLHandler.getSubNode(entrynode, "fields");

            // How many field arguments?
            int nrFields = XMLHandler.countNodes(fields, "field");
            source_filefolder = new String[nrFields];
            wildcard = new String[nrFields];

            // Read them all...
            for (int i = 0; i < nrFields; i++) {
                Node fnode = XMLHandler.getSubNodeByNr(fields, "field", i);

                source_filefolder[i] = XMLHandler.getTagValue(fnode, "source_filefolder");
                wildcard[i] = XMLHandler.getTagValue(fnode, "wildcard");
            }
        } catch (KettleXMLException xe) {

            throw new KettleXMLException(
                    BaseMessages.getString(PKG, "JobXMLWellFormed.Error.Exception.UnableLoadXML"), xe);
        }
    }

    public void loadRep(Repository rep, IMetaStore metaStore, ObjectId id_jobentry, List<DatabaseMeta> databases,
            List<SlaveServer> slaveServers) throws KettleException {
        try {
            arg_from_previous = rep.getJobEntryAttributeBoolean(id_jobentry, "arg_from_previous");
            include_subfolders = rep.getJobEntryAttributeBoolean(id_jobentry, "include_subfolders");

            nr_errors_less_than = rep.getJobEntryAttributeString(id_jobentry, "nr_errors_less_than");
            success_condition = rep.getJobEntryAttributeString(id_jobentry, "success_condition");
            resultfilenames = rep.getJobEntryAttributeString(id_jobentry, "resultfilenames");

            // How many arguments?
            int argnr = rep.countNrJobEntryAttributes(id_jobentry, "source_filefolder");
            source_filefolder = new String[argnr];
            wildcard = new String[argnr];

            // Read them all...
            for (int a = 0; a < argnr; a++) {
                source_filefolder[a] = rep.getJobEntryAttributeString(id_jobentry, a, "source_filefolder");
                wildcard[a] = rep.getJobEntryAttributeString(id_jobentry, a, "wildcard");
            }
        } catch (KettleException dbe) {

            throw new KettleException(
                    BaseMessages.getString(PKG, "JobXMLWellFormed.Error.Exception.UnableLoadRep") + id_jobentry,
                    dbe);
        }
    }

    public void saveRep(Repository rep, IMetaStore metaStore, ObjectId id_job) throws KettleException {
        try {
            rep.saveJobEntryAttribute(id_job, getObjectId(), "arg_from_previous", arg_from_previous);
            rep.saveJobEntryAttribute(id_job, getObjectId(), "include_subfolders", include_subfolders);
            rep.saveJobEntryAttribute(id_job, getObjectId(), "nr_errors_less_than", nr_errors_less_than);
            rep.saveJobEntryAttribute(id_job, getObjectId(), "success_condition", success_condition);
            rep.saveJobEntryAttribute(id_job, getObjectId(), "resultfilenames", resultfilenames);

            // save the arguments...
            if (source_filefolder != null) {
                for (int i = 0; i < source_filefolder.length; i++) {
                    rep.saveJobEntryAttribute(id_job, getObjectId(), i, "source_filefolder", source_filefolder[i]);
                    rep.saveJobEntryAttribute(id_job, getObjectId(), i, "wildcard", wildcard[i]);
                }
            }
        } catch (KettleDatabaseException dbe) {

            throw new KettleException(
                    BaseMessages.getString(PKG, "JobXMLWellFormed.Error.Exception.UnableSaveRep") + id_job, dbe);
        }
    }

    public Result execute(Result previousResult, int nr) throws KettleException {
        Result result = previousResult;
        result.setNrErrors(1);
        result.setResult(false);

        List<RowMetaAndData> rows = result.getRows();
        RowMetaAndData resultRow = null;

        NrErrors = 0;
        NrWellFormed = 0;
        NrBadFormed = 0;
        limitFiles = Const.toInt(environmentSubstitute(getNrErrorsLessThan()), 10);
        successConditionBroken = false;
        successConditionBrokenExit = false;

        // Get source and destination files, also wildcard
        String[] vsourcefilefolder = source_filefolder;
        String[] vwildcard = wildcard;

        if (arg_from_previous) {
            if (log.isDetailed()) {
                logDetailed(BaseMessages.getString(PKG, "JobXMLWellFormed.Log.ArgFromPrevious.Found",
                        (rows != null ? rows.size() : 0) + ""));
            }

        }
        if (arg_from_previous && rows != null) // Copy the input row to the (command line) arguments
        {
            for (int iteration = 0; iteration < rows.size() && !parentJob.isStopped(); iteration++) {
                if (successConditionBroken) {
                    if (!successConditionBrokenExit) {
                        logError(BaseMessages.getString(PKG, "JobXMLWellFormed.Error.SuccessConditionbroken",
                                "" + NrAllErrors));
                        successConditionBrokenExit = true;
                    }
                    result.setEntryNr(NrAllErrors);
                    result.setNrLinesRejected(NrBadFormed);
                    result.setNrLinesWritten(NrWellFormed);
                    return result;
                }

                resultRow = rows.get(iteration);

                // Get source and destination file names, also wildcard
                String vsourcefilefolder_previous = resultRow.getString(0, null);
                String vwildcard_previous = resultRow.getString(1, null);

                if (log.isDetailed()) {
                    logDetailed(BaseMessages.getString(PKG, "JobXMLWellFormed.Log.ProcessingRow",
                            vsourcefilefolder_previous, vwildcard_previous));
                }

                processFileFolder(vsourcefilefolder_previous, vwildcard_previous, parentJob, result);
            }
        } else if (vsourcefilefolder != null) {
            for (int i = 0; i < vsourcefilefolder.length && !parentJob.isStopped(); i++) {
                if (successConditionBroken) {
                    if (!successConditionBrokenExit) {
                        logError(BaseMessages.getString(PKG, "JobXMLWellFormed.Error.SuccessConditionbroken",
                                "" + NrAllErrors));
                        successConditionBrokenExit = true;
                    }
                    result.setEntryNr(NrAllErrors);
                    result.setNrLinesRejected(NrBadFormed);
                    result.setNrLinesWritten(NrWellFormed);
                    return result;
                }

                if (log.isDetailed()) {
                    logDetailed(BaseMessages.getString(PKG, "JobXMLWellFormed.Log.ProcessingRow",
                            vsourcefilefolder[i], vwildcard[i]));
                }

                processFileFolder(vsourcefilefolder[i], vwildcard[i], parentJob, result);

            }
        }

        // Success Condition
        result.setNrErrors(NrAllErrors);
        result.setNrLinesRejected(NrBadFormed);
        result.setNrLinesWritten(NrWellFormed);
        if (getSuccessStatus()) {
            result.setNrErrors(0);
            result.setResult(true);
        }

        displayResults();

        return result;
    }

    private void displayResults() {
        if (log.isDetailed()) {
            logDetailed("=======================================");
            logDetailed(BaseMessages.getString(PKG, "JobXMLWellFormed.Log.Info.FilesInError", "" + NrErrors));
            logDetailed(
                    BaseMessages.getString(PKG, "JobXMLWellFormed.Log.Info.FilesInBadFormed", "" + NrBadFormed));
            logDetailed(
                    BaseMessages.getString(PKG, "JobXMLWellFormed.Log.Info.FilesInWellFormed", "" + NrWellFormed));
            logDetailed("=======================================");
        }
    }

    private boolean checkIfSuccessConditionBroken() {
        boolean retval = false;
        if ((NrAllErrors > 0 && getSuccessCondition().equals(SUCCESS_IF_NO_ERRORS))
                || (NrBadFormed >= limitFiles && getSuccessCondition().equals(SUCCESS_IF_BAD_FORMED_FILES_LESS))) {
            retval = true;
        }
        return retval;
    }

    private boolean getSuccessStatus() {
        boolean retval = false;

        if ((NrAllErrors == 0 && getSuccessCondition().equals(SUCCESS_IF_NO_ERRORS))
                || (NrWellFormed >= limitFiles
                        && getSuccessCondition().equals(SUCCESS_IF_AT_LEAST_X_FILES_WELL_FORMED))
                || (NrBadFormed < limitFiles && getSuccessCondition().equals(SUCCESS_IF_BAD_FORMED_FILES_LESS))) {
            retval = true;
        }

        return retval;
    }

    private void updateErrors() {
        NrErrors++;
        updateAllErrors();
        if (checkIfSuccessConditionBroken()) {
            // Success condition was broken
            successConditionBroken = true;
        }
    }

    private void updateAllErrors() {
        NrAllErrors = NrErrors + NrBadFormed;
    }

    public static class XMLTreeHandler extends DefaultHandler {

    }

    private boolean CheckFile(FileObject file) {
        boolean retval = false;
        try {
            retval = XMLCheck.isXMLFileWellFormed(file);
        } catch (Exception e) {
            logError(BaseMessages.getString(PKG, "JobXMLWellFormed.Log.ErrorCheckingFile", file.toString(),
                    e.getMessage()));
        }

        return retval;
    }

    private boolean processFileFolder(String sourcefilefoldername, String wildcard, Job parentJob, Result result) {
        boolean entrystatus = false;
        FileObject sourcefilefolder = null;
        FileObject CurrentFile = null;

        // Get real source file and wilcard
        String realSourceFilefoldername = environmentSubstitute(sourcefilefoldername);
        if (Const.isEmpty(realSourceFilefoldername)) {
            logError(BaseMessages.getString(PKG, "JobXMLWellFormed.log.FileFolderEmpty", sourcefilefoldername));
            // Update Errors
            updateErrors();

            return entrystatus;
        }
        String realWildcard = environmentSubstitute(wildcard);

        try {
            sourcefilefolder = KettleVFS.getFileObject(realSourceFilefoldername, this);

            if (sourcefilefolder.exists()) {
                if (log.isDetailed()) {
                    logDetailed(BaseMessages.getString(PKG, "JobXMLWellFormed.Log.FileExists",
                            sourcefilefolder.toString()));
                }
                if (sourcefilefolder.getType() == FileType.FILE) {
                    entrystatus = checkOneFile(sourcefilefolder, result, parentJob);

                } else if (sourcefilefolder.getType() == FileType.FOLDER) {
                    FileObject[] fileObjects = sourcefilefolder.findFiles(new AllFileSelector() {
                        public boolean traverseDescendents(FileSelectInfo info) {
                            return true;
                        }

                        public boolean includeFile(FileSelectInfo info) {

                            FileObject fileObject = info.getFile();
                            try {
                                if (fileObject == null) {
                                    return false;
                                }
                                if (fileObject.getType() != FileType.FILE) {
                                    return false;
                                }
                            } catch (Exception ex) {
                                // Upon error don't process the file.
                                return false;
                            } finally {
                                if (fileObject != null) {
                                    try {
                                        fileObject.close();
                                    } catch (IOException ex) {
                                        /* Ignore */
                                    }
                                }

                            }
                            return true;
                        }
                    });

                    if (fileObjects != null) {
                        for (int j = 0; j < fileObjects.length && !parentJob.isStopped(); j++) {
                            if (successConditionBroken) {
                                if (!successConditionBrokenExit) {
                                    logError(BaseMessages.getString(PKG,
                                            "JobXMLWellFormed.Error.SuccessConditionbroken", "" + NrAllErrors));
                                    successConditionBrokenExit = true;
                                }
                                return false;
                            }
                            // Fetch files in list one after one ...
                            CurrentFile = fileObjects[j];

                            if (!CurrentFile.getParent().toString().equals(sourcefilefolder.toString())) {
                                // Not in the Base Folder..Only if include sub folders
                                if (include_subfolders) {
                                    if (GetFileWildcard(CurrentFile.toString(), realWildcard)) {
                                        checkOneFile(CurrentFile, result, parentJob);
                                    }
                                }

                            } else {
                                // In the base folder
                                if (GetFileWildcard(CurrentFile.toString(), realWildcard)) {
                                    checkOneFile(CurrentFile, result, parentJob);
                                }
                            }
                        }
                    }
                } else {
                    logError(BaseMessages.getString(PKG, "JobXMLWellFormed.Error.UnknowFileFormat",
                            sourcefilefolder.toString()));
                    // Update Errors
                    updateErrors();
                }
            } else {
                logError(BaseMessages.getString(PKG, "JobXMLWellFormed.Error.SourceFileNotExists",
                        realSourceFilefoldername));
                // Update Errors
                updateErrors();
            }
        } catch (Exception e) {
            logError(BaseMessages.getString(PKG, "JobXMLWellFormed.Error.Exception.Processing",
                    realSourceFilefoldername.toString(), e));
            // Update Errors
            updateErrors();
        } finally {
            if (sourcefilefolder != null) {
                try {
                    sourcefilefolder.close();
                } catch (IOException ex) {
                    /* Ignore */
                }

            }
            if (CurrentFile != null) {
                try {
                    CurrentFile.close();
                } catch (IOException ex) {
                    /* Ignore */
                }
            }
        }
        return entrystatus;
    }

    private boolean checkOneFile(FileObject file, Result result, Job parentJob) throws KettleException {
        boolean retval = false;
        try {
            // We deal with a file..so let's check if it's well formed
            boolean retformed = CheckFile(file);
            if (!retformed) {
                logError(BaseMessages.getString(PKG, "JobXMLWellFormed.Error.FileBadFormed", file.toString()));
                // Update Bad formed files number
                updateBadFormed();
                if (resultfilenames.equals(ADD_ALL_FILENAMES)
                        || resultfilenames.equals(ADD_BAD_FORMED_FILES_ONLY)) {
                    addFileToResultFilenames(KettleVFS.getFilename(file), result, parentJob);
                }
            } else {
                if (log.isDetailed()) {
                    logDetailed("---------------------------");
                    logDetailed(
                            BaseMessages.getString(PKG, "JobXMLWellFormed.Error.FileWellFormed", file.toString()));
                }
                // Update Well formed files number
                updateWellFormed();
                if (resultfilenames.equals(ADD_ALL_FILENAMES)
                        || resultfilenames.equals(ADD_WELL_FORMED_FILES_ONLY)) {
                    addFileToResultFilenames(KettleVFS.getFilename(file), result, parentJob);
                }
            }

        } catch (Exception e) {
            throw new KettleException("Unable to verify file '" + file + "'", e);
        }
        return retval;
    }

    private void updateWellFormed() {
        NrWellFormed++;
    }

    private void updateBadFormed() {
        NrBadFormed++;
        updateAllErrors();
    }

    private void addFileToResultFilenames(String fileaddentry, Result result, Job parentJob) {
        try {
            ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_GENERAL,
                    KettleVFS.getFileObject(fileaddentry, this), parentJob.getJobname(), toString());
            result.getResultFiles().put(resultFile.getFile().toString(), resultFile);

            if (log.isDetailed()) {
                logDetailed(BaseMessages.getString(PKG, "JobXMLWellFormed.Log.FileAddedToResultFilesName",
                        fileaddentry));
            }

        } catch (Exception e) {
            logError(BaseMessages.getString(PKG, "JobXMLWellFormed.Error.AddingToFilenameResult", fileaddentry,
                    e.getMessage()));
        }

    }

    /**********************************************************
     *
     * @param selectedfile
     * @param wildcard
     * @return True if the selectedfile matches the wildcard
     **********************************************************/
    private boolean GetFileWildcard(String selectedfile, String wildcard) {
        Pattern pattern = null;
        boolean getIt = true;

        if (!Const.isEmpty(wildcard)) {
            pattern = Pattern.compile(wildcard);
            // First see if the file matches the regular expression!
            if (pattern != null) {
                Matcher matcher = pattern.matcher(selectedfile);
                getIt = matcher.matches();
            }
        }

        return getIt;
    }

    public void setIncludeSubfolders(boolean include_subfoldersin) {
        this.include_subfolders = include_subfoldersin;
    }

    public void setArgFromPrevious(boolean argfrompreviousin) {
        this.arg_from_previous = argfrompreviousin;
    }

    public void setNrErrorsLessThan(String nr_errors_less_than) {
        this.nr_errors_less_than = nr_errors_less_than;
    }

    public String getNrErrorsLessThan() {
        return nr_errors_less_than;
    }

    public void setSuccessCondition(String success_condition) {
        this.success_condition = success_condition;
    }

    public String getSuccessCondition() {
        return success_condition;
    }

    public void setResultFilenames(String resultfilenames) {
        this.resultfilenames = resultfilenames;
    }

    public String getResultFilenames() {
        return resultfilenames;
    }

    public boolean evaluates() {
        return true;
    }

    public void check(List<CheckResultInterface> remarks, JobMeta jobMeta, VariableSpace space,
            Repository repository, IMetaStore metaStore) {
        boolean res = andValidator().validate(this, "arguments", remarks, putValidators(notNullValidator()));

        if (res == false) {
            return;
        }

        ValidatorContext ctx = new ValidatorContext();
        putVariableSpace(ctx, getVariables());
        putValidators(ctx, notNullValidator(), fileExistsValidator());

        for (int i = 0; i < source_filefolder.length; i++) {
            andValidator().validate(this, "arguments[" + i + "]", remarks, ctx);
        }
    }

}