Java tutorial
/* Copyright (c) 2007 Pentaho Corporation. All rights reserved. * This software was developed by Pentaho Corporation and is provided under the terms * of the GNU Lesser General Public License, Version 2.1. You may not use * this file except in compliance with the license. If you need a copy of the license, * please go to http://www.gnu.org/licenses/lgpl-2.1.txt. The Original Code is Pentaho * Data Integration. The Initial Developer is Samatar HASSAN. * * Software distributed under the GNU Lesser Public License is distributed on an "AS IS" * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. Please refer to * the license for the specific language governing your rights and limitations.*/ package be.ibridge.kettle.jsoup; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.commons.vfs.FileObject; import org.pentaho.di.core.CheckResult; import org.pentaho.di.core.CheckResultInterface; import org.pentaho.di.core.Const; import org.pentaho.di.core.Counter; import org.pentaho.di.core.database.DatabaseMeta; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleStepException; import org.pentaho.di.core.exception.KettleXMLException; import org.pentaho.di.core.fileinput.FileInputList; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.core.row.ValueMeta; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.variables.VariableSpace; import org.pentaho.di.core.xml.XMLHandler; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.repository.ObjectId; import org.pentaho.di.repository.Repository; import org.pentaho.di.resource.ResourceDefinition; import org.pentaho.di.resource.ResourceNamingInterface; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStepMeta; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; import org.w3c.dom.Node; /** * Store run-time data on the JsoupInput step. */ public class JsoupInputMeta extends BaseStepMeta implements StepMetaInterface { private static Class<?> PKG = JsoupInputMeta.class; // for i18n purposes, needed by Translator2!! $NON-NLS-1$ private static final String YES = "Y"; public static final String[] RequiredFilesDesc = new String[] { BaseMessages.getString(PKG, "System.Combo.No"), BaseMessages.getString(PKG, "System.Combo.Yes") }; public static final String[] RequiredFilesCode = new String[] { "N", "Y" }; /** Array of filenames */ private String fileName[]; /** Wildcard or filemask (regular expression) */ private String fileMask[]; /** Array of boolean values as string, indicating if a file is required. */ private String fileRequired[]; /** Wildcard or filemask to exclude (regular expression) */ private String excludeFileMask[]; /** Flag indicating that we should include the filename in the output */ private boolean includeFilename; /** The name of the field in the output containing the filename */ private String filenameField; /** Flag indicating that a row number field should be included in the output */ private boolean includeRowNumber; /** The name of the field in the output containing the row number*/ private String rowNumberField; /** The maximum number or lines to read */ private long rowLimit; /** The fields to import... */ private JsoupInputField inputFields[]; /** Is In fields */ private String valueField; /** Is In fields */ private boolean inFields; /** Is a File */ private boolean IsAFile; /** Flag: add result filename **/ private boolean addResultFile; /** Flag : do we ignore empty files */ private boolean IsIgnoreEmptyFile; /** Array of boolean values as string, indicating if we need to fetch sub folders. */ private String includeSubFolders[]; /** Flag : do not fail if no file */ private boolean doNotFailIfNoFile; /** Flag : read url as source */ private boolean readurl; /** Additional fields **/ private String shortFileFieldName; private String pathFieldName; private String hiddenFieldName; private String lastModificationTimeFieldName; private String uriNameFieldName; private String rootUriNameFieldName; private String extensionFieldName; private String sizeFieldName; public JsoupInputMeta() { super(); // allocate BaseStepMeta } /** * @return Returns the shortFileFieldName. */ public String getShortFileNameField() { return shortFileFieldName; } /** * @param field The shortFileFieldName to set. */ public void setShortFileNameField(String field) { shortFileFieldName = field; } /** * @return Returns the pathFieldName. */ public String getPathField() { return pathFieldName; } /** * @param field The pathFieldName to set. */ public void setPathField(String field) { this.pathFieldName = field; } /** * @return Returns the hiddenFieldName. */ public String isHiddenField() { return hiddenFieldName; } /** * @param field The hiddenFieldName to set. */ public void setIsHiddenField(String field) { hiddenFieldName = field; } /** * @return Returns the lastModificationTimeFieldName. */ public String getLastModificationDateField() { return lastModificationTimeFieldName; } /** * @param field The lastModificationTimeFieldName to set. */ public void setLastModificationDateField(String field) { lastModificationTimeFieldName = field; } /** * @return Returns the uriNameFieldName. */ public String getUriField() { return uriNameFieldName; } /** * @param field The uriNameFieldName to set. */ public void setUriField(String field) { uriNameFieldName = field; } /** * @return Returns the uriNameFieldName. */ public String getRootUriField() { return rootUriNameFieldName; } /** * @param field The rootUriNameFieldName to set. */ public void setRootUriField(String field) { rootUriNameFieldName = field; } /** * @return Returns the extensionFieldName. */ public String getExtensionField() { return extensionFieldName; } /** * @param field The extensionFieldName to set. */ public void setExtensionField(String field) { extensionFieldName = field; } /** * @return Returns the sizeFieldName. */ public String getSizeField() { return sizeFieldName; } /** * @param field The sizeFieldName to set. */ public void setSizeField(String field) { sizeFieldName = field; } /** * @return the add result filesname flag */ public boolean addResultFile() { return addResultFile; } /** * @return the readurl flag */ public boolean isReadUrl() { return readurl; } /** * @param readurl the readurl flag to set */ public void setReadUrl(boolean readurl) { this.readurl = readurl; } public void setAddResultFile(boolean addResultFile) { this.addResultFile = addResultFile; } /** * @return Returns the input fields. */ public JsoupInputField[] getInputFields() { return inputFields; } /** * @param inputFields The input fields to set. */ public void setInputFields(JsoupInputField[] inputFields) { this.inputFields = inputFields; } /** * @return Returns the excludeFileMask. */ public String[] getExludeFileMask() { return excludeFileMask; } /** * @param excludeFileMask The excludeFileMask to set. */ public void setExcludeFileMask(String[] excludeFileMask) { this.excludeFileMask = excludeFileMask; } /** * Get field value. */ public String getFieldValue() { return valueField; } /** * Set field field. */ public void setFieldValue(String value) { this.valueField = value; } /** * Get the IsInFields. */ public boolean isInFields() { return inFields; } /** * @param inFields set the inFields. */ public void setInFields(boolean inFields) { this.inFields = inFields; } /** * @return Returns the fileMask. */ public String[] getFileMask() { return fileMask; } /** * @param fileMask The fileMask to set. */ public void setFileMask(String[] fileMask) { this.fileMask = fileMask; } public String[] getFileRequired() { return fileRequired; } public void setFileRequired(String[] fileRequiredin) { for (int i = 0; i < fileRequiredin.length; i++) { this.fileRequired[i] = getRequiredFilesCode(fileRequiredin[i]); } } public void setIncludeSubFolders(String[] includeSubFoldersin) { for (int i = 0; i < includeSubFoldersin.length; i++) { this.includeSubFolders[i] = getRequiredFilesCode(includeSubFoldersin[i]); } } /** * @return Returns the fileName. */ public String[] getFileName() { return fileName; } /** * @param fileName The fileName to set. */ public void setFileName(String[] fileName) { this.fileName = fileName; } /** * @return Returns the filenameField. */ public String getFilenameField() { return filenameField; } /** * @param filenameField The filenameField to set. */ public void setFilenameField(String filenameField) { this.filenameField = filenameField; } /** * @return Returns the includeFilename. */ public boolean includeFilename() { return includeFilename; } /** * @param includeFilename The includeFilename to set. */ public void setIncludeFilename(boolean includeFilename) { this.includeFilename = includeFilename; } /** * @return Returns the includeRowNumber. */ public boolean includeRowNumber() { return includeRowNumber; } /** * @param includeRowNumber The includeRowNumber to set. */ public void setIncludeRowNumber(boolean includeRowNumber) { this.includeRowNumber = includeRowNumber; } /** * @return Returns the rowLimit. */ public long getRowLimit() { return rowLimit; } /** * @param rowLimit The rowLimit to set. */ public void setRowLimit(long rowLimit) { this.rowLimit = rowLimit; } /** * @return the IsIgnoreEmptyFile flag */ public boolean isIgnoreEmptyFile() { return IsIgnoreEmptyFile; } /** * @param IsIgnoreEmptyFile the IsIgnoreEmptyFile to set */ public void setIgnoreEmptyFile(boolean IsIgnoreEmptyFile) { this.IsIgnoreEmptyFile = IsIgnoreEmptyFile; } /** * @return the doNotFailIfNoFile flag */ public boolean isdoNotFailIfNoFile() { return doNotFailIfNoFile; } /** * @param doNotFailIfNoFile the doNotFailIfNoFile to set */ public void setdoNotFailIfNoFile(boolean doNotFailIfNoFile) { this.doNotFailIfNoFile = doNotFailIfNoFile; } /** * @return Returns the rowNumberField. */ public String getRowNumberField() { return rowNumberField; } /** * @param rowNumberField The rowNumberField to set. */ public void setRowNumberField(String rowNumberField) { this.rowNumberField = rowNumberField; } public boolean getIsAFile() { return IsAFile; } public void setIsAFile(boolean IsAFile) { this.IsAFile = IsAFile; } public String[] getIncludeSubFolders() { return includeSubFolders; } public void loadXML(Node stepnode, List<DatabaseMeta> databases, Map<String, Counter> counters) throws KettleXMLException { readData(stepnode); } public Object clone() { JsoupInputMeta retval = (JsoupInputMeta) super.clone(); int nrFiles = fileName.length; int nrFields = inputFields.length; retval.allocate(nrFiles, nrFields); for (int i = 0; i < nrFiles; i++) { retval.fileName[i] = fileName[i]; retval.fileMask[i] = fileMask[i]; retval.excludeFileMask[i] = excludeFileMask[i]; retval.fileRequired[i] = fileRequired[i]; retval.includeSubFolders[i] = includeSubFolders[i]; } for (int i = 0; i < nrFields; i++) { if (inputFields[i] != null) { retval.inputFields[i] = (JsoupInputField) inputFields[i].clone(); } } return retval; } public String getXML() { StringBuffer retval = new StringBuffer(400); retval.append(" ").append(XMLHandler.addTagValue("include", includeFilename)); retval.append(" ").append(XMLHandler.addTagValue("include_field", filenameField)); retval.append(" ").append(XMLHandler.addTagValue("rownum", includeRowNumber)); retval.append(" ").append(XMLHandler.addTagValue("addresultfile", addResultFile)); retval.append(" ").append(XMLHandler.addTagValue("readurl", readurl)); retval.append(" " + XMLHandler.addTagValue("IsIgnoreEmptyFile", IsIgnoreEmptyFile)); retval.append(" " + XMLHandler.addTagValue("doNotFailIfNoFile", doNotFailIfNoFile)); retval.append(" ").append(XMLHandler.addTagValue("rownum_field", rowNumberField)); retval.append(" <file>").append(Const.CR); for (int i = 0; i < fileName.length; i++) { retval.append(" ").append(XMLHandler.addTagValue("name", fileName[i])); retval.append(" ").append(XMLHandler.addTagValue("filemask", fileMask[i])); retval.append(" ").append(XMLHandler.addTagValue("exclude_filemask", excludeFileMask[i])); retval.append(" ").append(XMLHandler.addTagValue("file_required", fileRequired[i])); retval.append(" ").append(XMLHandler.addTagValue("include_subfolders", includeSubFolders[i])); } retval.append(" </file>").append(Const.CR); retval.append(" <fields>").append(Const.CR); for (int i = 0; i < inputFields.length; i++) { JsoupInputField field = inputFields[i]; retval.append(field.getXML()); } retval.append(" </fields>").append(Const.CR); retval.append(" ").append(XMLHandler.addTagValue("limit", rowLimit)); retval.append(" ").append(XMLHandler.addTagValue("IsInFields", inFields)); retval.append(" ").append(XMLHandler.addTagValue("IsAFile", IsAFile)); retval.append(" ").append(XMLHandler.addTagValue("valueField", valueField)); retval.append(" ").append(XMLHandler.addTagValue("shortFileFieldName", shortFileFieldName)); retval.append(" ").append(XMLHandler.addTagValue("pathFieldName", pathFieldName)); retval.append(" ").append(XMLHandler.addTagValue("hiddenFieldName", hiddenFieldName)); retval.append(" ") .append(XMLHandler.addTagValue("lastModificationTimeFieldName", lastModificationTimeFieldName)); retval.append(" ").append(XMLHandler.addTagValue("uriNameFieldName", uriNameFieldName)); retval.append(" ").append(XMLHandler.addTagValue("rootUriNameFieldName", rootUriNameFieldName)); retval.append(" ").append(XMLHandler.addTagValue("extensionFieldName", extensionFieldName)); retval.append(" ").append(XMLHandler.addTagValue("sizeFieldName", sizeFieldName)); return retval.toString(); } public String getRequiredFilesDesc(String tt) { if (Const.isEmpty(tt)) return RequiredFilesDesc[0]; if (tt.equalsIgnoreCase(RequiredFilesCode[1])) return RequiredFilesDesc[1]; else return RequiredFilesDesc[0]; } public String getRequiredFilesCode(String tt) { if (tt == null) return RequiredFilesCode[0]; if (tt.equals(RequiredFilesDesc[1])) return RequiredFilesCode[1]; else return RequiredFilesCode[0]; } private void readData(Node stepnode) throws KettleXMLException { try { includeFilename = "Y".equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "include")); filenameField = XMLHandler.getTagValue(stepnode, "include_field"); addResultFile = "Y".equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "addresultfile")); readurl = "Y".equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "readurl")); IsIgnoreEmptyFile = "Y".equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "IsIgnoreEmptyFile")); doNotFailIfNoFile = "Y".equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "doNotFailIfNoFile")); includeRowNumber = "Y".equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "rownum")); rowNumberField = XMLHandler.getTagValue(stepnode, "rownum_field"); Node filenode = XMLHandler.getSubNode(stepnode, "file"); Node fields = XMLHandler.getSubNode(stepnode, "fields"); int nrFiles = XMLHandler.countNodes(filenode, "name"); int nrFields = XMLHandler.countNodes(fields, "field"); allocate(nrFiles, nrFields); for (int i = 0; i < nrFiles; i++) { Node filenamenode = XMLHandler.getSubNodeByNr(filenode, "name", i); Node filemasknode = XMLHandler.getSubNodeByNr(filenode, "filemask", i); Node excludefilemasknode = XMLHandler.getSubNodeByNr(filenode, "exclude_filemask", i); Node fileRequirednode = XMLHandler.getSubNodeByNr(filenode, "file_required", i); Node includeSubFoldersnode = XMLHandler.getSubNodeByNr(filenode, "include_subfolders", i); fileName[i] = XMLHandler.getNodeValue(filenamenode); fileMask[i] = XMLHandler.getNodeValue(filemasknode); excludeFileMask[i] = XMLHandler.getNodeValue(excludefilemasknode); fileRequired[i] = XMLHandler.getNodeValue(fileRequirednode); includeSubFolders[i] = XMLHandler.getNodeValue(includeSubFoldersnode); } for (int i = 0; i < nrFields; i++) { Node fnode = XMLHandler.getSubNodeByNr(fields, "field", i); JsoupInputField field = new JsoupInputField(fnode); inputFields[i] = field; } // Is there a limit on the number of rows we process? rowLimit = Const.toLong(XMLHandler.getTagValue(stepnode, "limit"), 0L); inFields = "Y".equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "IsInFields")); IsAFile = "Y".equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "IsAFile")); valueField = XMLHandler.getTagValue(stepnode, "valueField"); shortFileFieldName = XMLHandler.getTagValue(stepnode, "shortFileFieldName"); pathFieldName = XMLHandler.getTagValue(stepnode, "pathFieldName"); hiddenFieldName = XMLHandler.getTagValue(stepnode, "hiddenFieldName"); lastModificationTimeFieldName = XMLHandler.getTagValue(stepnode, "lastModificationTimeFieldName"); uriNameFieldName = XMLHandler.getTagValue(stepnode, "uriNameFieldName"); rootUriNameFieldName = XMLHandler.getTagValue(stepnode, "rootUriNameFieldName"); extensionFieldName = XMLHandler.getTagValue(stepnode, "extensionFieldName"); sizeFieldName = XMLHandler.getTagValue(stepnode, "sizeFieldName"); } catch (Exception e) { throw new KettleXMLException( BaseMessages.getString(PKG, "JsoupInputMeta.Exception.ErrorLoadingXML", e.toString())); } } public void allocate(int nrfiles, int nrfields) { fileName = new String[nrfiles]; fileMask = new String[nrfiles]; excludeFileMask = new String[nrfiles]; fileRequired = new String[nrfiles]; includeSubFolders = new String[nrfiles]; inputFields = new JsoupInputField[nrfields]; } public void setDefault() { shortFileFieldName = null; pathFieldName = null; hiddenFieldName = null; lastModificationTimeFieldName = null; uriNameFieldName = null; rootUriNameFieldName = null; extensionFieldName = null; sizeFieldName = null; IsIgnoreEmptyFile = false; doNotFailIfNoFile = true; includeFilename = false; filenameField = ""; includeRowNumber = false; rowNumberField = ""; IsAFile = false; addResultFile = false; readurl = false; int nrFiles = 0; int nrFields = 0; allocate(nrFiles, nrFields); for (int i = 0; i < nrFiles; i++) { fileName[i] = "filename" + (i + 1); fileMask[i] = ""; excludeFileMask[i] = ""; fileRequired[i] = RequiredFilesCode[0]; includeSubFolders[i] = RequiredFilesCode[0]; } for (int i = 0; i < nrFields; i++) { inputFields[i] = new JsoupInputField("field" + (i + 1)); } rowLimit = 0; inFields = false; valueField = ""; } public void getFields(RowMetaInterface r, String name, RowMetaInterface info[], StepMeta nextStep, VariableSpace space) throws KettleStepException { int i; for (i = 0; i < inputFields.length; i++) { JsoupInputField field = inputFields[i]; int type = field.getType(); if (type == ValueMeta.TYPE_NONE) type = ValueMeta.TYPE_STRING; ValueMetaInterface v = new ValueMeta(space.environmentSubstitute(field.getName()), type); v.setLength(field.getLength()); v.setPrecision(field.getPrecision()); v.setOrigin(name); v.setConversionMask(field.getFormat()); v.setDecimalSymbol(field.getDecimalSymbol()); v.setGroupingSymbol(field.getGroupSymbol()); v.setCurrencySymbol(field.getCurrencySymbol()); r.addValueMeta(v); } if (includeFilename) { ValueMetaInterface v = new ValueMeta(space.environmentSubstitute(filenameField), ValueMeta.TYPE_STRING); v.setLength(250); v.setPrecision(-1); v.setOrigin(name); r.addValueMeta(v); } if (includeRowNumber) { ValueMetaInterface v = new ValueMeta(space.environmentSubstitute(rowNumberField), ValueMeta.TYPE_INTEGER); v.setLength(ValueMetaInterface.DEFAULT_INTEGER_LENGTH, 0); v.setOrigin(name); r.addValueMeta(v); } // Add additional fields if (getShortFileNameField() != null && getShortFileNameField().length() > 0) { ValueMetaInterface v = new ValueMeta(space.environmentSubstitute(getShortFileNameField()), ValueMeta.TYPE_STRING); v.setLength(100, -1); v.setOrigin(name); r.addValueMeta(v); } if (getExtensionField() != null && getExtensionField().length() > 0) { ValueMetaInterface v = new ValueMeta(space.environmentSubstitute(getExtensionField()), ValueMeta.TYPE_STRING); v.setLength(100, -1); v.setOrigin(name); r.addValueMeta(v); } if (getPathField() != null && getPathField().length() > 0) { ValueMetaInterface v = new ValueMeta(space.environmentSubstitute(getPathField()), ValueMeta.TYPE_STRING); v.setLength(100, -1); v.setOrigin(name); r.addValueMeta(v); } if (getSizeField() != null && getSizeField().length() > 0) { ValueMetaInterface v = new ValueMeta(space.environmentSubstitute(getSizeField()), ValueMeta.TYPE_INTEGER); v.setOrigin(name); v.setLength(9); r.addValueMeta(v); } if (isHiddenField() != null && isHiddenField().length() > 0) { ValueMetaInterface v = new ValueMeta(space.environmentSubstitute(isHiddenField()), ValueMeta.TYPE_BOOLEAN); v.setOrigin(name); r.addValueMeta(v); } if (getLastModificationDateField() != null && getLastModificationDateField().length() > 0) { ValueMetaInterface v = new ValueMeta(space.environmentSubstitute(getLastModificationDateField()), ValueMeta.TYPE_DATE); v.setOrigin(name); r.addValueMeta(v); } if (getUriField() != null && getUriField().length() > 0) { ValueMetaInterface v = new ValueMeta(space.environmentSubstitute(getUriField()), ValueMeta.TYPE_STRING); v.setLength(100, -1); v.setOrigin(name); r.addValueMeta(v); } if (getRootUriField() != null && getRootUriField().length() > 0) { ValueMetaInterface v = new ValueMeta(space.environmentSubstitute(getRootUriField()), ValueMeta.TYPE_STRING); v.setLength(100, -1); v.setOrigin(name); r.addValueMeta(v); } } public void readRep(Repository rep, ObjectId id_step, List<DatabaseMeta> databases, Map<String, Counter> counters) throws KettleException { try { includeFilename = rep.getStepAttributeBoolean(id_step, "include"); filenameField = rep.getStepAttributeString(id_step, "include_field"); addResultFile = rep.getStepAttributeBoolean(id_step, "addresultfile"); readurl = rep.getStepAttributeBoolean(id_step, "readurl"); IsIgnoreEmptyFile = rep.getStepAttributeBoolean(id_step, "IsIgnoreEmptyFile"); doNotFailIfNoFile = rep.getStepAttributeBoolean(id_step, "doNotFailIfNoFile"); includeRowNumber = rep.getStepAttributeBoolean(id_step, "rownum"); rowNumberField = rep.getStepAttributeString(id_step, "rownum_field"); rowLimit = rep.getStepAttributeInteger(id_step, "limit"); int nrFiles = rep.countNrStepAttributes(id_step, "file_name"); int nrFields = rep.countNrStepAttributes(id_step, "field_name"); allocate(nrFiles, nrFields); for (int i = 0; i < nrFiles; i++) { fileName[i] = rep.getStepAttributeString(id_step, i, "file_name"); fileMask[i] = rep.getStepAttributeString(id_step, i, "file_mask"); excludeFileMask[i] = rep.getStepAttributeString(id_step, i, "exclude_file_mask"); fileRequired[i] = rep.getStepAttributeString(id_step, i, "file_required"); includeSubFolders[i] = rep.getStepAttributeString(id_step, i, "include_subfolders"); } for (int i = 0; i < nrFields; i++) { JsoupInputField field = new JsoupInputField(); field.setName(rep.getStepAttributeString(id_step, i, "field_name")); field.setPath(rep.getStepAttributeString(id_step, i, "field_path")); field.setType(ValueMeta.getType(rep.getStepAttributeString(id_step, i, "field_type"))); field.setFormat(rep.getStepAttributeString(id_step, i, "field_format")); field.setCurrencySymbol(rep.getStepAttributeString(id_step, i, "field_currency")); field.setDecimalSymbol(rep.getStepAttributeString(id_step, i, "field_decimal")); field.setGroupSymbol(rep.getStepAttributeString(id_step, i, "field_group")); field.setLength((int) rep.getStepAttributeInteger(id_step, i, "field_length")); field.setPrecision((int) rep.getStepAttributeInteger(id_step, i, "field_precision")); field.setTrimType(JsoupInputField .getTrimTypeByCode(rep.getStepAttributeString(id_step, i, "field_trim_type"))); field.setRepeated(rep.getStepAttributeBoolean(id_step, i, "field_repeat")); field.setResultType(JsoupInputField .getResultTypeByCode(rep.getStepAttributeString(id_step, i, "field_result_type"))); inputFields[i] = field; } inFields = rep.getStepAttributeBoolean(id_step, "IsInFields"); IsAFile = rep.getStepAttributeBoolean(id_step, "IsAFile"); valueField = rep.getStepAttributeString(id_step, "valueField"); shortFileFieldName = rep.getStepAttributeString(id_step, "shortFileFieldName"); pathFieldName = rep.getStepAttributeString(id_step, "pathFieldName"); hiddenFieldName = rep.getStepAttributeString(id_step, "hiddenFieldName"); lastModificationTimeFieldName = rep.getStepAttributeString(id_step, "lastModificationTimeFieldName"); rootUriNameFieldName = rep.getStepAttributeString(id_step, "rootUriNameFieldName"); extensionFieldName = rep.getStepAttributeString(id_step, "extensionFieldName"); sizeFieldName = rep.getStepAttributeString(id_step, "sizeFieldName"); } catch (Exception e) { throw new KettleException( BaseMessages.getString(PKG, "JsoupInputMeta.Exception.ErrorReadingRepository"), e); } } public void saveRep(Repository rep, ObjectId id_transformation, ObjectId id_step) throws KettleException { try { rep.saveStepAttribute(id_transformation, id_step, "include", includeFilename); rep.saveStepAttribute(id_transformation, id_step, "include_field", filenameField); rep.saveStepAttribute(id_transformation, id_step, "addresultfile", addResultFile); rep.saveStepAttribute(id_transformation, id_step, "readurl", readurl); rep.saveStepAttribute(id_transformation, id_step, "IsIgnoreEmptyFile", IsIgnoreEmptyFile); rep.saveStepAttribute(id_transformation, id_step, "doNotFailIfNoFile", doNotFailIfNoFile); rep.saveStepAttribute(id_transformation, id_step, "rownum", includeRowNumber); rep.saveStepAttribute(id_transformation, id_step, "rownum_field", rowNumberField); rep.saveStepAttribute(id_transformation, id_step, "limit", rowLimit); for (int i = 0; i < fileName.length; i++) { rep.saveStepAttribute(id_transformation, id_step, i, "file_name", fileName[i]); rep.saveStepAttribute(id_transformation, id_step, i, "file_mask", fileMask[i]); rep.saveStepAttribute(id_transformation, id_step, i, "exclude_file_mask", excludeFileMask[i]); rep.saveStepAttribute(id_transformation, id_step, i, "file_required", fileRequired[i]); rep.saveStepAttribute(id_transformation, id_step, i, "include_subfolders", includeSubFolders[i]); } for (int i = 0; i < inputFields.length; i++) { JsoupInputField field = inputFields[i]; rep.saveStepAttribute(id_transformation, id_step, i, "field_name", field.getName()); rep.saveStepAttribute(id_transformation, id_step, i, "field_path", field.getPath()); rep.saveStepAttribute(id_transformation, id_step, i, "field_type", field.getTypeDesc()); rep.saveStepAttribute(id_transformation, id_step, i, "field_format", field.getFormat()); rep.saveStepAttribute(id_transformation, id_step, i, "field_currency", field.getCurrencySymbol()); rep.saveStepAttribute(id_transformation, id_step, i, "field_decimal", field.getDecimalSymbol()); rep.saveStepAttribute(id_transformation, id_step, i, "field_group", field.getGroupSymbol()); rep.saveStepAttribute(id_transformation, id_step, i, "field_length", field.getLength()); rep.saveStepAttribute(id_transformation, id_step, i, "field_precision", field.getPrecision()); rep.saveStepAttribute(id_transformation, id_step, i, "field_trim_type", field.getTrimTypeCode()); rep.saveStepAttribute(id_transformation, id_step, i, "field_result_type", field.getResultTypeCode()); rep.saveStepAttribute(id_transformation, id_step, i, "field_repeat", field.isRepeated()); } rep.saveStepAttribute(id_transformation, id_step, "IsInFields", inFields); rep.saveStepAttribute(id_transformation, id_step, "IsAFile", IsAFile); rep.saveStepAttribute(id_transformation, id_step, "valueField", valueField); rep.saveStepAttribute(id_transformation, id_step, "shortFileFieldName", shortFileFieldName); rep.saveStepAttribute(id_transformation, id_step, "pathFieldName", pathFieldName); rep.saveStepAttribute(id_transformation, id_step, "hiddenFieldName", hiddenFieldName); rep.saveStepAttribute(id_transformation, id_step, "lastModificationTimeFieldName", lastModificationTimeFieldName); rep.saveStepAttribute(id_transformation, id_step, "uriNameFieldName", uriNameFieldName); rep.saveStepAttribute(id_transformation, id_step, "rootUriNameFieldName", rootUriNameFieldName); rep.saveStepAttribute(id_transformation, id_step, "extensionFieldName", extensionFieldName); } catch (Exception e) { throw new KettleException( BaseMessages.getString(PKG, "JsoupInputMeta.Exception.ErrorSavingToRepository", "" + id_step), e); } } public FileInputList getFiles(VariableSpace space) { return FileInputList.createFileList(space, fileName, fileMask, excludeFileMask, fileRequired, includeSubFolderBoolean()); } private boolean[] includeSubFolderBoolean() { int len = fileName.length; boolean includeSubFolderBoolean[] = new boolean[len]; for (int i = 0; i < len; i++) { includeSubFolderBoolean[i] = YES.equalsIgnoreCase(includeSubFolders[i]); } return includeSubFolderBoolean; } public void check(List<CheckResultInterface> remarks, TransMeta transMeta, StepMeta stepMeta, RowMetaInterface prev, String input[], String output[], RowMetaInterface info) { CheckResult cr; if (!isInFields()) { // See if we get input... if (input.length <= 0) { cr = new CheckResult(CheckResult.TYPE_RESULT_ERROR, BaseMessages.getString(PKG, "JsoupInputMeta.CheckResult.NoInputExpected"), stepMeta); remarks.add(cr); } else { cr = new CheckResult(CheckResult.TYPE_RESULT_OK, BaseMessages.getString(PKG, "JsoupInputMeta.CheckResult.NoInput"), stepMeta); remarks.add(cr); } } if (getInputFields().length <= 0) { cr = new CheckResult(CheckResult.TYPE_RESULT_ERROR, BaseMessages.getString(PKG, "JsoupInputMeta.CheckResult.NoInputField"), stepMeta); remarks.add(cr); } if (isInFields()) { if (Const.isEmpty(getFieldValue())) { cr = new CheckResult(CheckResult.TYPE_RESULT_ERROR, BaseMessages.getString(PKG, "JsoupInputMeta.CheckResult.NoField"), stepMeta); remarks.add(cr); } else { cr = new CheckResult(CheckResult.TYPE_RESULT_OK, BaseMessages.getString(PKG, "JsoupInputMeta.CheckResult.FieldOk"), stepMeta); remarks.add(cr); } } else { FileInputList fileInputList = getFiles(transMeta); // String files[] = getFiles(); if (fileInputList == null || fileInputList.getFiles().size() == 0) { cr = new CheckResult(CheckResult.TYPE_RESULT_ERROR, BaseMessages.getString(PKG, "JsoupInputMeta.CheckResult.NoFiles"), stepMeta); remarks.add(cr); } else { cr = new CheckResult( CheckResult.TYPE_RESULT_OK, BaseMessages.getString(PKG, "JsoupInputMeta.CheckResult.FilesOk", "" + fileInputList.getFiles().size()), stepMeta); remarks.add(cr); } } } public StepInterface getStep(StepMeta stepMeta, StepDataInterface stepDataInterface, int cnr, TransMeta tr, Trans trans) { return new JsoupInput(stepMeta, stepDataInterface, cnr, tr, trans); } public StepDataInterface getStepData() { return new JsoupInputData(); } public boolean supportsErrorHandling() { return true; } /** * Since the exported transformation that runs this will reside in a ZIP file, we can't reference files relatively. * So what this does is turn the name of files into absolute paths OR it simply includes the resource in the ZIP file. * For now, we'll simply turn it into an absolute path and pray that the file is on a shared drive or something like that. * TODO: create options to configure this behavior */ public String exportResources(VariableSpace space, Map<String, ResourceDefinition> definitions, ResourceNamingInterface resourceNamingInterface, Repository repository) throws KettleException { try { // The object that we're modifying here is a copy of the original! // So let's change the filename from relative to absolute by grabbing the file object... // In case the name of the file comes from previous steps, forget about this! // List<String> newFilenames = new ArrayList<String>(); if (!isInFields()) { FileInputList fileList = getFiles(space); if (fileList.getFiles().size() > 0) { for (FileObject fileObject : fileList.getFiles()) { // From : ${Internal.Transformation.Filename.Directory}/../foo/bar.xml // To : /home/matt/test/files/foo/bar.xml // // If the file doesn't exist, forget about this effort too! // if (fileObject.exists()) { // Convert to an absolute path and add it to the list. // newFilenames.add(fileObject.getName().getPath()); } } // Still here: set a new list of absolute filenames! // fileName = newFilenames.toArray(new String[newFilenames.size()]); fileMask = new String[newFilenames.size()]; // all null since converted to absolute path. fileRequired = new String[newFilenames.size()]; // all null, turn to "Y" : for (int i = 0; i < newFilenames.size(); i++) fileRequired[i] = "Y"; } } return null; } catch (Exception e) { throw new KettleException(e); //$NON-NLS-1$ } } }