au.org.theark.phenotypic.util.PhenoDataSetFieldImportValidator.java Source code

Java tutorial

Introduction

Here is the source code for au.org.theark.phenotypic.util.PhenoDataSetFieldImportValidator.java

Source

/*******************************************************************************
 * Copyright (c) 2011  University of Western Australia. All rights reserved.
 * 
 * This file is part of The Ark.
 * 
 * The Ark is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 3
 * of the License, or (at your option) any later version.
 * 
 * The Ark is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ******************************************************************************/
package au.org.theark.phenotypic.util;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.text.DateFormat;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;

import jxl.Workbook;
import jxl.read.biff.BiffException;

import org.apache.shiro.SecurityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import au.org.theark.core.exception.ArkBaseException;
import au.org.theark.core.exception.CustomFieldSystemException;
import au.org.theark.core.exception.EntityNotFoundException;
import au.org.theark.core.exception.FileFormatException;
import au.org.theark.core.model.pheno.entity.PhenoDataSetField;
import au.org.theark.core.model.study.entity.ArkFunction;
import au.org.theark.core.model.study.entity.ArkModule;
import au.org.theark.core.model.study.entity.CustomField;
import au.org.theark.core.model.study.entity.FieldType;
import au.org.theark.core.model.study.entity.Study;
import au.org.theark.core.model.study.entity.UnitType;
import au.org.theark.core.service.IArkCommonService;
import au.org.theark.core.util.CustomFieldImportValidator;
import au.org.theark.core.util.CustomFieldValidationMessage;
import au.org.theark.core.util.DataConversionAndManipulationHelper;
import au.org.theark.core.util.XLStoCSV;
import au.org.theark.core.vo.UploadVO;
import au.org.theark.core.web.component.worksheet.ArkGridCell;
import au.org.theark.phenotypic.service.IPhenotypicService;
import au.org.theark.phenotypic.web.component.phenofielduploader.Constants;

import com.csvreader.CsvReader;

/**
 * CustomFieldImportValidator provides support for validating import file before trying to use  <br>
 * CustomFieldImporter to import it into custom field table for the specifed function in question <br>
 * i.e. Function could be Subject, BioCollection or Biospecimen (or others if set up)
 * 
 * @author cellis
 * @author elam
 */
public class PhenoDataSetFieldImportValidator implements IPhenoImportValidator, Serializable {

    private static final long serialVersionUID = 1L;
    static Logger log = LoggerFactory.getLogger(PhenoDataSetFieldImportValidator.class);
    boolean qualityControl = false;
    private Study study;
    private String fieldName;
    private long subjectCount;
    private long fieldCount;
    private long curPos;
    private long srcLength = -1;
    //   private StopWatch                  timer                     = null;
    private char delimChr = au.org.theark.core.Constants.IMPORT_DELIM_CHAR_COMMA;
    private List<String> fileValidationMessages = new ArrayList<String>();
    private List<String> dataValidationMessages = new ArrayList<String>();
    private IArkCommonService<Void> iArkCommonService = null;

    private HashSet<Integer> insertRows = new HashSet<Integer>();
    private HashSet<Integer> updateRows = new HashSet<Integer>();
    private HashSet<ArkGridCell> insertCells = new HashSet<ArkGridCell>();
    private HashSet<ArkGridCell> updateCells = new HashSet<ArkGridCell>();
    private HashSet<ArkGridCell> warningCells = new HashSet<ArkGridCell>();
    private HashSet<ArkGridCell> errorCells = new HashSet<ArkGridCell>();
    private String fileFormat = au.org.theark.core.Constants.DEFAULT_FILE_FORMAT;
    private int row = 1;
    private ArkFunction arkFunction;
    private ArkModule arkModule;
    private static IPhenotypicService iPhenotypicService = null;

    /**
     * CustomFieldImportValidator constructor
     * @param iArkCommonService
     * @param uploadVo
     */
    public PhenoDataSetFieldImportValidator(IArkCommonService<Void> iArkCommonService,
            IPhenotypicService iPhenotypicService, UploadVO uploadVo) {
        this.iArkCommonService = iArkCommonService;
        this.iPhenotypicService = iPhenotypicService;
        this.arkFunction = uploadVo.getUpload().getArkFunction();

        // Set study in context
        Long studyId = (Long) SecurityUtils.getSubject().getSession()
                .getAttribute(au.org.theark.core.Constants.STUDY_CONTEXT_ID);
        if (studyId != null) {
            Study study = iArkCommonService.getStudy(studyId);
            this.study = study;
        }

        this.fileValidationMessages = new ArrayList<String>();
        this.dataValidationMessages = new ArrayList<String>();

        String filename = uploadVo.getFileUpload().getClientFileName();
        this.fileFormat = filename.substring(filename.lastIndexOf('.') + 1).toUpperCase();

        this.delimChr = uploadVo.getUpload().getDelimiterType().getDelimiterCharacter();

    }

    public boolean isQualityControl() {
        return qualityControl;
    }

    public void setQualityControl(boolean qualityControl) {
        this.qualityControl = qualityControl;
    }

    /**
     * Validates the data dictionary file's general structure/format:<BR>
     * "FIELD_NAME","FIELD_TYPE","DESCRIPTION", "QUESTION", "UNITS","ENCODED_VALUES","MINIMUM_VALUE","MAXIMUM_VALUE","MISSING_VALUE","REQUIRED","ALLOW_MULTIPLE_SELECTION"
     * 
     * @param fileInputStream
     *           is the input stream of a file
     @param inLength
     *           is the length of the file
     * @throws IOException
     *            input/output Exception
     * @throws CustomFieldSystemException
     *            custom field system Exception
     */
    @Override
    public java.util.Collection<String> validateMatrixFileFormat(InputStream fileInputStream, long inLength,
            boolean isForPheno) throws FileFormatException, CustomFieldSystemException {
        curPos = 0;
        row = 0;

        InputStreamReader inputStreamReader = null;
        CsvReader csvReader = null;

        try {
            inputStreamReader = new InputStreamReader(fileInputStream);
            csvReader = new CsvReader(inputStreamReader, delimChr);
            String[] stringLineArray;

            srcLength = inLength;
            if (srcLength <= 0) {
                throw new FileFormatException(
                        "The input size was not greater than 0.  Actual length reported: " + srcLength);
            }

            //csvReader.readHeaders();
            if (!csvReader.readHeaders()) {
                fileValidationMessages.add("Header reading was unsuccessful. Please check the file format again.");
            }

            // Set field list (note 2th column to Nth column)
            // FIELD_NAME FIELD_TYPE DESCRIPTION QUESTION UNITS ENCODED_VALUES MINIMUM_VALUE MAXIMUM_VALUE MISSING_VALUE REQUIRED
            // 0 1 2 3 4 5 6 7 8
            String[] fileHeaderColumnArray = csvReader.getHeaders();
            boolean headerError = false;

            // Uploading a Custom Field (Data Dictionary) file
            Collection<String> fileHeaderCollection = new ArrayList<String>();

            String[] requiredHeaderArray = null;
            if (isForPheno) {
                requiredHeaderArray = au.org.theark.core.Constants.DATA_DICTIONARY_HEADER;
            } else {
                //remove the first element from array.
                requiredHeaderArray = Arrays.copyOfRange(Constants.PHENO_DATASET_FIELD_UPLOAD_HEADER[0], 1,
                        Constants.PHENO_DATASET_FIELD_UPLOAD_HEADER[0].length);
            }

            log.info("requiredArray = " + Arrays.toString(requiredHeaderArray));
            log.info("requiredlength = " + requiredHeaderArray.length);
            log.info("fileHeaderColumnArray = " + Arrays.toString(fileHeaderColumnArray));
            log.info("fileHeaderColumnlength = " + fileHeaderColumnArray.length);

            String specificError = "";

            //all columns mandatory, even if data empty
            if (fileHeaderColumnArray.length < requiredHeaderArray.length) {
                specificError = "File did not contain all " + requiredHeaderArray.length + " expected headers.\n";
                log.info("error because less headers than required");
                headerError = true;
            }
            // Populate the collection for a search
            for (int i = 0; i < fileHeaderColumnArray.length; i++) {
                fileHeaderCollection.add(fileHeaderColumnArray[i]);
            }

            log.info("fileHeaderlength now = " + fileHeaderCollection.size());

            // Search the dataDictionaryHeader for missing headers
            for (int i = 0; i < requiredHeaderArray.length; i++) {
                //String ithHeader = (String)requiredHeaderArray[i];
                //ithHeader.compareToIgnoreCase(str)
                if (!fileHeaderCollection.contains(requiredHeaderArray[i])) {
                    log.info("error because didn't contact the following required header" + requiredHeaderArray[i]);
                    specificError = "File was missing the following required header: " + requiredHeaderArray[i]
                            + ".\n";
                    headerError = true;
                    break;
                }
            }

            if (headerError) {
                // Invalid file format
                StringBuffer stringBuffer = new StringBuffer();
                String delimiterTypeName = iArkCommonService.getDelimiterTypeNameByDelimiterChar(delimChr);

                stringBuffer.append(
                        "The specified file does not appear to conform to the expected data dictionary file format.\n");
                stringBuffer.append(specificError);
                stringBuffer.append("The specified file format was: " + fileFormat + "\n");
                stringBuffer
                        .append("The specified delimiter was: [" + delimChr + "] (" + delimiterTypeName + ")\n");
                stringBuffer.append("The default data dictionary format is as follows:\n");
                for (int i = 0; i < requiredHeaderArray.length; i++) {
                    if (i > 0) {
                        stringBuffer.append(delimChr);
                    }
                    stringBuffer.append(requiredHeaderArray[i]);
                }
                stringBuffer.append("\n");
                for (int i = 0; i < requiredHeaderArray.length; i++) {
                    if (i > 0) {
                        stringBuffer.append(delimChr);
                    }
                    stringBuffer.append("[...]");
                }
                stringBuffer.append("\n");

                fileValidationMessages.add(stringBuffer.toString());
            } else {
                List<String> requiredHeaders = new ArrayList<String>();
                for (int i = 0; i < requiredHeaderArray.length; i++) {
                    requiredHeaders.add(requiredHeaderArray[i]);
                }
                for (int i = 0; i < fileHeaderColumnArray.length; i++) {
                    if (!requiredHeaders.contains(fileHeaderColumnArray[i])) {
                        fileValidationMessages.add("Error: the column name " + fileHeaderColumnArray[i]
                                + " is not a valid column name.");
                    }
                }

                srcLength = inLength - csvReader.getHeaders().toString().length();
                log.debug("Header length: " + csvReader.getHeaders().toString().length());

                row = 1;

                // Loop through all rows in file
                while (csvReader.readRecord()) {
                    // do something with the newline to put the data into
                    // the variables defined above
                    stringLineArray = csvReader.getValues();

                    // Loop through columns in current row in file, starting from the 2th position
                    for (int i = 0; i < stringLineArray.length; i++) {
                        // Update progress
                        curPos += stringLineArray[i].length() + 1; // update progress
                    }

                    subjectCount++;
                    row++;
                }
            }

            if (fileValidationMessages.size() > 0) {
                for (Iterator<String> iterator = fileValidationMessages.iterator(); iterator.hasNext();) {
                    String errorMessage = iterator.next();
                    log.debug(errorMessage);
                }
            } else {
                log.debug("Validation is ok");
            }
        } catch (IOException ioe) {
            log.error("processMatrixPhenoFile IOException stacktrace:", ioe);
            throw new CustomFieldSystemException(
                    "An unexpected I/O exception occurred whilst reading the phenotypic data file.");
        } catch (Exception ex) {
            log.error("processMatrixPhenoFile Exception stacktrace:", ex);
            throw new CustomFieldSystemException(
                    "An unexpected exception occurred when trying to process phenotypic data file.");
        } finally {
            // Clean up the IO objects
            //timer.stop();
            // fileValidationMessages.add("Total elapsed time: " + timer.getTime() + " ms or " + decimalFormat.format(timer.getTime() / 1000.0) + " s");
            // fileValidationMessages.add("Total file size: " + srcLength + " B or " + decimalFormat.format(srcLength / 1024.0 / 1024.0) + " MB");

            //if (timer != null)
            //   timer = null;
            if (csvReader != null) {
                try {
                    csvReader.close();
                } catch (Exception ex) {
                    log.error("Cleanup operation failed: csvRdr.close()", ex);
                }
            }
            if (inputStreamReader != null) {
                try {
                    inputStreamReader.close();
                } catch (Exception ex) {
                    log.error("Cleanup operation failed: isr.close()", ex);
                }
            }
            // Restore the state of variables
            srcLength = -1;
        }

        return fileValidationMessages;
    }

    /**
     * Validates the values supplied in the data dictionary file to ensure they meet requirements for import to database
     * 
     * @param fileInputStream
     *           is the input stream of a file
     * @param inLength
     *           is the length of the file
     * @throws IOException
     *            input/output Exception
     * @throws CustomFieldSystemException
     *            CustomField System Exception
     */
    private java.util.Collection<String> validateDataDictionaryFileData(InputStream fileInputStream, long inLength)
            throws FileFormatException, CustomFieldSystemException {
        curPos = 0;
        int rowIdx = 1;

        InputStreamReader inputStreamReader = null;
        CsvReader csvReader = null;
        DecimalFormat decimalFormat = new DecimalFormat("0.00");

        /*
         * Field table requires: ID, STUDY_ID, FIELD_TYPE_ID, NAME, DESCRIPTION, UNITS, MIN_VALUE, MAX_VALUE, ENCODED_VALUES, MISSING_VALUE
         */
        PhenoDataSetField field = new PhenoDataSetField();
        field.setStudy(study);

        //these fields must be available for phenocollection...therefore we are to save / update / get by that ark function...ideally this should be by ark module
        /*if(arkFunction.getName().equals(au.org.theark.core.Constants.FUNCTION_KEY_VALUE_DATA_DICTIONARY) || arkFunction.getName().equals(au.org.theark.core.Constants.FUNCTION_KEY_VALUE_DATA_DICTIONARY_UPLOAD)){
           arkFunction = iArkCommonService.getArkFunctionByName(au.org.theark.core.Constants.FUNCTION_KEY_VALUE_PHENO_COLLECTION);
        }*/
        arkFunction = iArkCommonService
                .getArkFunctionByName(au.org.theark.core.Constants.FUNCTION_KEY_VALUE_DATA_DICTIONARY);

        try {
            inputStreamReader = new InputStreamReader(fileInputStream);
            csvReader = new CsvReader(inputStreamReader, delimChr);
            String[] stringLineArray;
            srcLength = inLength;
            if (srcLength <= 0) {
                throw new FileFormatException(
                        "The input size was not greater than 0.  Actual length reported: " + srcLength);
            }
            //         timer = new StopWatch();
            //         timer.start();
            csvReader.readHeaders();
            srcLength = inLength - csvReader.getHeaders().toString().length();
            log.debug("Header length: " + csvReader.getHeaders().toString().length());
            // Loop through all rows in file
            while (csvReader.readRecord()) {
                // do something with the newline to put the data into
                // the variables defined above
                stringLineArray = csvReader.getValues();

                ArkGridCell gridCell = null;

                // First column should be Field Name
                fieldName = csvReader.get("FIELD_NAME");

                // Only check rows with a valid fieldName
                if (!fieldName.isEmpty()) {
                    int cols = stringLineArray.length;
                    field = new PhenoDataSetField();
                    field.setStudy(study);
                    field.setName(fieldName);
                    field.setDescription(csvReader.get("DESCRIPTION"));
                    field.setFieldLabel(csvReader.get("QUESTION"));
                    field.setUnitTypeInText(csvReader.get("UNITS"));
                    //}
                    FieldType studyFieldType = new FieldType();
                    try {
                        studyFieldType = iArkCommonService.getFieldTypeByName(csvReader.get("FIELD_TYPE"));
                        field.setFieldType(studyFieldType);
                    } catch (EntityNotFoundException e) {
                        // Field Type not found, handled in error messaging below....
                    }

                    String encodedValues = csvReader.get("ENCODED_VALUES");

                    field.setEncodedValues(encodedValues);
                    /* removed the below logic whie this is moved to customfieldgroup
                    if(encodedValues!=null && !encodedValues.isEmpty()){
                       field.setEncodedValues(encodedValues);
                       if(csvReader.get("ALLOW_MULTIPLE_SELECTIONS")!=null && !csvReader.get("ALLOW_MULTIPLE_SELECTIONS").isEmpty()){
                          field.
                       }
                    }*/
                    field.setMinValue(csvReader.get("MINIMUM_VALUE"));
                    field.setMaxValue(csvReader.get("MAXIMUM_VALUE"));
                    field.setMissingValue(csvReader.get("MISSING_VALUE"));
                    field.setDefaultValue(csvReader.get("DEFAULT_VALUE"));

                    //This is how the old custom field being captured by the name.
                    //It is unique field according to the db index defined.
                    PhenoDataSetField oldField = iPhenotypicService.getPhenoDataSetFieldByNameStudyArkFunction(
                            csvReader.get("FIELD_NAME"), study, arkFunction);
                    //CustomField oldField = iArkCommonService.getCustomFieldByNameStudyArkFunction(csvReader.get("FIELD_NAME"), study, arkFunction);
                    if (oldField == null) {
                        // This is a new record - not able to find an existing field by that name
                        insertRows.add(rowIdx);
                    } else {
                        // Determine updates
                        if (oldField.getId() != null) {
                            if (oldField.getPhenoFieldHasData()) {
                                // Block updates to field that already have data
                                for (int colIdx = 0; colIdx < cols; colIdx++) {
                                    errorCells.add(new ArkGridCell(colIdx, rowIdx));
                                }

                                StringBuffer stringBuffer = new StringBuffer();
                                stringBuffer.append("Error: ");
                                stringBuffer.append("The existing field ");
                                stringBuffer.append(fieldName);
                                stringBuffer.append(
                                        " already has data associated with it and thus no changes can be made to this field.");
                                dataValidationMessages.add(stringBuffer.toString());
                                errorCells.add(gridCell);
                            } else {
                                updateRows.add(rowIdx);
                                for (int colIdx = 0; colIdx < cols; colIdx++) {
                                    updateCells.add(new ArkGridCell(colIdx, rowIdx));
                                }
                            }
                        }
                    }

                    if (csvReader.get("FIELD_TYPE") != null) {
                        gridCell = new ArkGridCell(csvReader.getIndex("FIELD_TYPE"), rowIdx);
                        if (!PhenoDataSetFieldImportValidator.validateFieldType(this.fieldName,
                                csvReader.get("FIELD_TYPE"), dataValidationMessages)) {
                            errorCells.add(gridCell);
                        }
                    }

                    boolean validForMultiSelect = false;
                    String allowMultiple = (csvReader.get("ALLOW_MULTIPLE_SELECTIONS"));
                    if (field.getEncodedValues() != null && !field.getEncodedValues().isEmpty()) {
                        gridCell = new ArkGridCell(csvReader.getIndex("ENCODED_VALUES"), rowIdx);
                        // Validate encoded values not a date type
                        if (!PhenoDataSetFieldImportValidator.validateEncodedValues(field,
                                dataValidationMessages)) {
                            errorCells.add(gridCell);
                        } else {
                            validForMultiSelect = true;
                            if (!DataConversionAndManipulationHelper.isSomethingLikeABoolean(allowMultiple)
                                    && !allowMultiple.isEmpty()) {
                                gridCell = new ArkGridCell(csvReader.getIndex("ALLOW_MULTIPLE_SELECTIONS"), rowIdx);
                                dataValidationMessages.add(PhenoDataSetFieldValidationMessage
                                        .invalidOption(field.getName(), "ALLOW_MULTIPLE_SELECTIONS"));
                                errorCells.add(gridCell);
                            } else if (!validForMultiSelect) {
                                gridCell = new ArkGridCell(csvReader.getIndex("ALLOW_MULTIPLE_SELECTIONS"), rowIdx);
                                dataValidationMessages.add(PhenoDataSetFieldValidationMessage
                                        .nonConformingAllowMultipleSelect(field.getName()));
                                errorCells.add(gridCell);
                            }
                        }

                    } else if (!allowMultiple.isEmpty()) {
                        gridCell = new ArkGridCell(csvReader.getIndex("ALLOW_MULTIPLE_SELECTIONS"), rowIdx);
                        dataValidationMessages.add(PhenoDataSetFieldValidationMessage
                                .nonConformingAllowMultipleSelect(field.getName()));
                        errorCells.add(gridCell);
                    }

                    //Handling the MultiLine display with encoded values
                    String allowMultiLine = (csvReader.get("MULTI_LINE_DISPLAY"));
                    if (field.getEncodedValues().isEmpty() && field.getEncodedValues().trim() == "") {
                        if (!DataConversionAndManipulationHelper.isSomethingLikeABoolean(allowMultiLine)
                                && !allowMultiLine.isEmpty()) {
                            gridCell = new ArkGridCell(csvReader.getIndex("MULTI_LINE_DISPLAY"), rowIdx);
                            dataValidationMessages.add(PhenoDataSetFieldValidationMessage
                                    .invalidOption(field.getName(), "MULTI_LINE_DISPLAY"));
                            errorCells.add(gridCell);
                        }

                    } else {
                        if (!allowMultiLine.isEmpty()) {
                            gridCell = new ArkGridCell(csvReader.getIndex("MULTI_LINE_DISPLAY"), rowIdx);
                            dataValidationMessages.add(PhenoDataSetFieldValidationMessage
                                    .nonMultiLineAllowWithEncodedValues(field.getName(), "MULTI_LINE_DISPLAY"));
                            errorCells.add(gridCell);
                        }
                    }
                    //Handling the MultiLine value for the other character types.
                    if (!allowMultiLine.isEmpty()
                            && !field.getFieldType().getName().equalsIgnoreCase(Constants.FIELD_TYPE_CHARACTER)) {
                        gridCell = new ArkGridCell(csvReader.getIndex("MULTI_LINE_DISPLAY"), rowIdx);
                        dataValidationMessages.add(PhenoDataSetFieldValidationMessage
                                .nonChatacterFieldTypeMultiLineNotAccepted(field.getName(), "MULTI_LINE_DISPLAY"));
                        errorCells.add(gridCell);
                    }

                    if (field.getMinValue() != null && !field.getMinValue().isEmpty()) {
                        gridCell = new ArkGridCell(csvReader.getIndex("MINIMUM_VALUE"), rowIdx);
                        // Validate the field definition
                        if (!PhenoDataSetFieldImportValidator.validateFieldMinDefinition(field,
                                dataValidationMessages)) {
                            errorCells.add(gridCell);
                        }
                    }

                    if (field.getMaxValue() != null && !field.getMaxValue().isEmpty()) {
                        gridCell = new ArkGridCell(csvReader.getIndex("MAXIMUM_VALUE"), rowIdx);
                        // Validate the field definition
                        if (!PhenoDataSetFieldImportValidator.validateFieldMaxDefinition(field,
                                dataValidationMessages)) {
                            errorCells.add(gridCell);
                        }
                    }

                    if (field.getMissingValue() != null && !field.getMissingValue().isEmpty()) {
                        gridCell = new ArkGridCell(csvReader.getIndex("MISSING_VALUE"), rowIdx);
                        // Validate the field definition
                        if (!PhenoDataSetFieldImportValidator.validateFieldMissingDefinition(field,
                                dataValidationMessages)) {
                            errorCells.add(gridCell);
                        }
                    }
                    if (field.getDefaultValue() != null && !field.getDefaultValue().isEmpty()) {
                        gridCell = new ArkGridCell(csvReader.getIndex("DEFAULT_VALUE"), rowIdx);
                        // Validate the field definition
                        if (!PhenoDataSetFieldImportValidator.validateFieldDefaultDefinition(field,
                                dataValidationMessages)) {
                            errorCells.add(gridCell);
                        }
                    }

                    // Required column only relevant to specific custom field data (eg subject custom field)
                    if (csvReader.getIndex("REQUIRED") > 0 && !(csvReader.get("REQUIRED")).isEmpty()) {
                        if (!DataConversionAndManipulationHelper
                                .isSomethingLikeABoolean(csvReader.get("REQUIRED"))) {
                            gridCell = new ArkGridCell(csvReader.getIndex("REQUIRED"), rowIdx);
                            dataValidationMessages.add(
                                    PhenoDataSetFieldValidationMessage.invalidOption(field.getName(), "REQUIRED"));
                            errorCells.add(gridCell);
                        }
                    }

                    fieldCount++;
                    rowIdx++;
                }
            }

            if (dataValidationMessages.size() > 0) {
                log.debug("Validation messages: " + dataValidationMessages.size());
                for (Iterator<String> iterator = dataValidationMessages.iterator(); iterator.hasNext();) {
                    String errorMessage = iterator.next();
                    log.debug(errorMessage);
                }
            } else {
                log.debug("Validation is ok");
            }
        } catch (IOException ioe) {
            log.error("processMatrixFile IOException stacktrace:", ioe);
            throw new CustomFieldSystemException(
                    "An unexpected I/O exception occurred whilst reading the data file.");
        } catch (Exception ex) {
            log.error("processMatrixFile Exception stacktrace:", ex);
            throw new CustomFieldSystemException(
                    "An unexpected exception occurred when trying to process data file.");
        } finally {
            // Clean up the IO objects
            //         timer.stop();
            //         log.debug("Total elapsed time: " + timer.getTime() + " ms or " + decimalFormat.format(timer.getTime() / 1000.0) + " s");
            log.debug("Total file size: " + srcLength + " B or " + decimalFormat.format(srcLength / 1024.0 / 1024.0)
                    + " MB");
            //         if (timer != null)
            //            timer = null;
            if (csvReader != null) {
                try {
                    csvReader.close();
                } catch (Exception ex) {
                    log.error("Cleanup operation failed: csvRdr.close()", ex);
                }
            }
            if (inputStreamReader != null) {
                try {
                    inputStreamReader.close();
                } catch (Exception ex) {
                    log.error("Cleanup operation failed: isr.close()", ex);
                }
            }
            // Restore the state of variables
            srcLength = -1;
        }

        if (errorCells.isEmpty()) {
            for (Iterator<Integer> iterator = updateRows.iterator(); iterator.hasNext();) {
                Integer i = (Integer) iterator.next();
                dataValidationMessages.add("Data on row " + i.intValue() + " exists, please confirm update");
            }
        }

        return dataValidationMessages;
    }

    /**
     * Validates the ENCODED_VALUES column in a matrix Data Dictionary file
     * 
     * @param field
     *           is the field entity in question
     * @param errorMessages
     *           is the error messages to add to is any errors
     * @return true if field.fieldType is NOT a DATE
     */
    private static boolean validateEncodedValues(PhenoDataSetField field, Collection<String> errorMessages) {
        boolean isValid = false;
        if (!field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_CHARACTER)) {
            // At the moment, only allowed to have encodedValues for a field where fieldType == CHARACTER
            errorMessages.add(PhenoDataSetFieldValidationMessage
                    .fieldTypeIsNotCharacterWithEncodedValue(field.getName(), field.getFieldType().getName()));
        } else if (!Pattern.matches(au.org.theark.core.Constants.ENCODED_VALUES_PATTERN,
                field.getEncodedValues())) {
            errorMessages.add(PhenoDataSetFieldValidationMessage.nonConformingEncodedValue(field.getName()));
        } else {
            isValid = true;
        }

        return isValid;
    }

    /**
     * Validates the general file format confirms to a data dictionary upload
     * 
     * @param inputStream
     *           is the input stream of the file
     * @param fileFormat
     *           is the file format (eg txt)
     * @param delimChar
     *           is the delimiter character of the file (eg comma)
     * @return a collection of validation messages
     */
    @Override
    public Collection<String> validateCustomDataMatrixFileFormat(InputStream inputStream, String fileFormat,
            char delimChar) {
        java.util.Collection<String> validationMessages = null;

        try {
            // If Excel, convert to CSV for validation
            if (fileFormat.equalsIgnoreCase("XLS")) {
                Workbook w;
                try {
                    w = Workbook.getWorkbook(inputStream);
                    delimChr = ',';
                    XLStoCSV xlsToCsv = new XLStoCSV(delimChr);
                    inputStream = xlsToCsv.convertXlsToCsv(w);
                    inputStream.reset();
                } catch (BiffException e) {
                    log.error(e.getMessage());
                } catch (IOException e) {
                    log.error(e.getMessage());
                }
            }
            validationMessages = validateMatrixFileFormat(inputStream, inputStream.toString().length(), false);
        } catch (FileFormatException ffe) {
            log.error("FILE_FORMAT_EXCPEPTION: " + ffe);
        } catch (ArkBaseException abe) {
            log.error("ARK_BASE_EXCEPTION: " + abe);
        }
        return validationMessages;
    }

    /**
     * Validates the general file format confirms to a data dictionary upload
     * 
     * @param inputStream
     *           is the input stream of the file
     * @param fileFormat
     *           is the file format (eg txt)
     * @param delimChar
     *           is the delimiter character of the file (eg comma)
     * @return a collection of validation messages
     */
    public Collection<String> validateMatrixPhenoFileFormat(InputStream inputStream, String fileFormat,
            char delimChar) {
        java.util.Collection<String> validationMessages = null;

        try {
            // If Excel, convert to CSV for validation
            if (fileFormat.equalsIgnoreCase("XLS")) {
                Workbook w;
                try {
                    w = Workbook.getWorkbook(inputStream);
                    delimChr = ',';
                    XLStoCSV xlsToCsv = new XLStoCSV(delimChr);
                    inputStream = xlsToCsv.convertXlsToCsv(w);
                    inputStream.reset();
                } catch (BiffException e) {
                    log.error(e.getMessage());
                } catch (IOException e) {
                    log.error(e.getMessage());
                }
            }
            validationMessages = validateMatrixFileFormat(inputStream, inputStream.toString().length(), true);
        } catch (FileFormatException ffe) {
            log.error("FILE_FORMAT_EXCPEPTION: " + ffe);
        } catch (ArkBaseException abe) {
            log.error("ARK_BASE_EXCEPTION: " + abe);
        }
        return validationMessages;
    }

    /**
     * Validates the file in the default "matrix" data dictionary file format assumed:
     * "FIELD_NAME","FIELD_TYPE","DESCRIPTION","UNITS","ENCODED_VALUES","MINIMUM_VALUE","MAXIMUM_VALUE","MISSING_VALUE"
     * 
     * @param inputStream
     *           is the input stream of the file
     * @param fileFormat
     *           is the file format (eg CSV, TXT or XLS)
     * @param delimChar
     *           is the delimiter character of the file (eg COMMA, TAB, PIPE etc)
     * @return a collection of validation messages
     */
    @Override
    public Collection<String> validateDataDictionaryFileData(InputStream inputStream, String fileFormat,
            char delimChar) {
        java.util.Collection<String> validationMessages = null;

        try {
            // If Excel, convert to CSV for validation
            if (fileFormat.equalsIgnoreCase("XLS")) {
                Workbook w;
                try {
                    w = Workbook.getWorkbook(inputStream);
                    delimChr = ',';
                    XLStoCSV xlsToCsv = new XLStoCSV(delimChr);
                    inputStream = xlsToCsv.convertXlsToCsv(w);
                    inputStream.reset();
                } catch (BiffException e) {
                    log.error(e.getMessage());
                } catch (IOException e) {
                    log.error(e.getMessage());
                }
            }
            validationMessages = validateDataDictionaryFileData(inputStream, inputStream.toString().length());
        } catch (FileFormatException ffe) {
            log.error("FILE_FORMAT_EXCPEPTION: " + ffe);
        } catch (ArkBaseException abe) {
            log.error("ARK_BASE_EXCEPTION: " + abe);
        }
        return validationMessages;
    }

    private static boolean validateFieldMissingDefinition(PhenoDataSetField field,
            Collection<String> errorMessages) {
        boolean isValid = true;

        if (!(field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_CHARACTER)
                || field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_NUMBER)
                || field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_DATE))) {
            errorMessages.add(PhenoDataSetFieldValidationMessage.fieldMissingValueNotDefinedType(field));
            isValid = false;
        }

        // Number field type
        if (field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_NUMBER)) {
            try {
                Float.parseFloat(field.getMissingValue());
                isValid = true;
            } catch (NumberFormatException nfe) {
                errorMessages.add(PhenoDataSetFieldValidationMessage.fieldMissingValueNotDefinedType(field));
                isValid = false;
            } catch (NullPointerException npe) {
                isValid = false;
            }
        }

        // Date field type
        if (field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_DATE)) {
            try {
                DateFormat dateFormat = new SimpleDateFormat(au.org.theark.core.Constants.DD_MM_YYYY);
                dateFormat.setLenient(false);
                dateFormat.parse(field.getMissingValue());
                isValid = true;
            } catch (ParseException pe) {
                errorMessages
                        .add(PhenoDataSetFieldValidationMessage.fieldDefinitionMissingValueNotValidDate(field));
                isValid = false;
            } catch (NullPointerException npe) {
                isValid = false;
            }
        }
        return isValid;
    }

    private static boolean validateFieldDefaultDefinition(PhenoDataSetField field,
            Collection<String> errorMessages) {
        boolean isValid = true;
        if (!(field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_CHARACTER)
                || field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_NUMBER)
                || field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_DATE))) {
            errorMessages.add(PhenoDataSetFieldValidationMessage.fieldDefaultValueNotDefinedType(field));
            isValid = false;
        }
        //Character field type
        //if Encoded value has been introduced check the default value has one of the encoded value occupied.
        //ARK-1357 -This Default value validation has to be removed-[2017-05-11-Sanjay].
        /*if (field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_CHARACTER)) {
           if(iPhenotypicService.isEncodedValue(field, field.getDefaultValue())){
        isValid=true;
           }else{
        errorMessages.add(PhenoDataSetFieldValidationMessage.fieldDefaultValueNotINEncodedLst(field));
        isValid=false;
           }
        }*/
        // Number field type
        if (field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_NUMBER)) {
            try {
                Float defaultVal = null;
                Float minVal = null;
                Float maxVal = null;

                if (!field.getDefaultValue().isEmpty() && field.getDefaultValue() != null) {
                    defaultVal = Float.parseFloat(field.getDefaultValue());
                }
                if (!field.getMinValue().isEmpty() && field.getMinValue() != null) {
                    minVal = Float.parseFloat(field.getMinValue());
                }
                if (!field.getMaxValue().isEmpty() && field.getMaxValue() != null) {
                    maxVal = Float.parseFloat(field.getMaxValue());
                }
                //check default value in between min and max
                if (defaultVal != null && minVal != null && maxVal != null) {
                    if ((minVal.equals(defaultVal) && maxVal > defaultVal)
                            || (maxVal.equals(defaultVal) && minVal < defaultVal)
                            || (minVal < defaultVal && maxVal > defaultVal)) {
                        isValid = true;
                    } else {
                        errorMessages.add(
                                PhenoDataSetFieldValidationMessage.fieldDefaultValueInsideMinAndMaxRange(field));
                        isValid = false;
                    }
                }
            } catch (NumberFormatException nfe) {
                errorMessages.add(PhenoDataSetFieldValidationMessage.fieldDefaultValueNotDefinedType(field));
                isValid = false;
            } catch (NullPointerException npe) {
                errorMessages
                        .add(PhenoDataSetFieldValidationMessage.fieldDefinitionDefaultValueValidatingNull(field));
                isValid = false;
            }
        }

        // Date field type
        if (field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_DATE)) {
            try {
                DateFormat dateFormat = new SimpleDateFormat(au.org.theark.core.Constants.DD_MM_YYYY);
                dateFormat.setLenient(false);
                Date defaultDate = null;
                Date minDate = null;
                Date maxDate = null;
                if (!field.getDefaultValue().isEmpty() && field.getDefaultValue() != null) {
                    defaultDate = dateFormat.parse(field.getDefaultValue());
                }
                if (!field.getMinValue().isEmpty() && field.getMinValue() != null) {
                    minDate = dateFormat.parse(field.getMinValue());
                }
                if (!field.getMaxValue().isEmpty() && field.getMaxValue() != null) {
                    maxDate = dateFormat.parse(field.getMaxValue());
                }
                //check default value in between min and max
                if (defaultDate != null && minDate != null && maxDate != null) {
                    if ((minDate.equals(defaultDate) && maxDate.after(defaultDate))
                            || (maxDate.equals(defaultDate) && minDate.before(defaultDate))
                            || (minDate.before(defaultDate) && maxDate.after(defaultDate))) {
                        isValid = true;
                    } else {
                        errorMessages.add(PhenoDataSetFieldValidationMessage
                                .fieldDefaultDateInsideMinAndMaxRange(field, defaultDate, minDate, maxDate));
                        isValid = false;
                    }
                }
            } catch (ParseException pe) {
                errorMessages
                        .add(PhenoDataSetFieldValidationMessage.fieldDefinitionDefaultValueNotValidDate(field));
                isValid = false;
            } catch (NullPointerException npe) {
                errorMessages
                        .add(PhenoDataSetFieldValidationMessage.fieldDefinitionDefaultValueValidatingNull(field));
                isValid = false;
            }
        }

        return isValid;
    }

    private static boolean validateFieldMaxDefinition(PhenoDataSetField field, Collection<String> errorMessages) {
        boolean isValid = false;

        if (!(field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_CHARACTER)
                || field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_NUMBER)
                || field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_DATE))) {
            errorMessages.add(PhenoDataSetFieldValidationMessage.fieldMaxValueNotDefinedType(field));
            return isValid;
        }

        // Number field type
        if (field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_NUMBER)) {
            try {
                Float.parseFloat(field.getMaxValue());
                isValid = true;
            } catch (NumberFormatException nfe) {
                errorMessages.add(PhenoDataSetFieldValidationMessage.fieldMaxValueNotDefinedType(field));
                isValid = false;
            } catch (NullPointerException npe) {
                isValid = false;
            }
        }

        // Date field type
        if (field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_DATE)) {
            try {
                DateFormat dateFormat = new SimpleDateFormat(au.org.theark.core.Constants.DD_MM_YYYY);
                dateFormat.setLenient(false);
                dateFormat.parse(field.getMaxValue());
                isValid = true;
            } catch (ParseException pe) {
                errorMessages.add(PhenoDataSetFieldValidationMessage.fieldDefinitionMaxValueNotValidDate(field));
                isValid = false;
            } catch (NullPointerException npe) {
                isValid = false;
            }
        }
        return isValid;
    }

    private static boolean validateFieldMinDefinition(PhenoDataSetField field, Collection<String> errorMessages) {
        boolean isValid = false;

        if (!(field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_CHARACTER)
                || field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_NUMBER)
                || field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_DATE))) {
            errorMessages.add(PhenoDataSetFieldValidationMessage.fieldMinValueNotDefinedType(field));
            return isValid;
        }

        // Number field type
        if (field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_NUMBER)) {
            try {
                Float.parseFloat(field.getMinValue());
                isValid = true;
            } catch (NumberFormatException nfe) {
                errorMessages.add(PhenoDataSetFieldValidationMessage.fieldMinValueNotDefinedType(field));
                log.error("Field data number format exception " + nfe.getMessage());
                isValid = false;
            } catch (NullPointerException npe) {
                log.error("Field data null pointer exception " + npe.getMessage());
                isValid = false;
            }
        }

        // Date field type
        if (field.getFieldType().getName().equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_DATE)) {
            try {
                DateFormat dateFormat = new SimpleDateFormat(au.org.theark.core.Constants.DD_MM_YYYY);
                dateFormat.setLenient(false);
                dateFormat.parse(field.getMinValue());
                isValid = true;
            } catch (ParseException pe) {
                errorMessages.add(PhenoDataSetFieldValidationMessage.fieldDefinitionMinValueNotValidDate(field));
                log.error("Field data date parse exception " + pe.getMessage());
                isValid = false;
            } catch (NullPointerException npe) {
                log.error("Field data null pointer exception " + npe.getMessage());
                isValid = false;
            }
        }
        return isValid;
    }

    private static boolean validateFieldType(String fieldName, String fieldType, Collection<String> errorMessages) {
        boolean isValid = false;

        if (fieldType.equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_CHARACTER)
                || fieldType.equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_NUMBER)
                || fieldType.equalsIgnoreCase(au.org.theark.core.Constants.FIELD_TYPE_DATE)) {
            isValid = true;
        } else {
            errorMessages.add(PhenoDataSetFieldValidationMessage.fieldTypeNotDefined(fieldName, fieldType));
            isValid = false;
        }

        return isValid;
    }

    /**
     * Return the progress of the current process in %
     * 
     * @return if a process is actively running, then progress in %; or if no process running, then returns -1
     */
    public double getProgress() {
        double progress = -1;

        if (srcLength > 0)
            progress = curPos * 100.0 / srcLength; // %

        return progress;
    }

    /**
    public double getSpeed() {
       double speed = -1;
        
       if (srcLength > 0)
     speed = curPos / 1024 / (timer.getTime() / 1000.0); // KB/s
        
       return speed;
    }
    */

    public HashSet<Integer> getInsertRows() {
        return insertRows;
    }

    public void setInsertRows(HashSet<Integer> insertRows) {
        this.insertRows = insertRows;
    }

    public HashSet<Integer> getUpdateRows() {
        return updateRows;
    }

    public void setUpdateRows(HashSet<Integer> updateRows) {
        this.updateRows = updateRows;
    }

    public HashSet<ArkGridCell> getInsertCells() {
        return insertCells;
    }

    public void setInsertCells(HashSet<ArkGridCell> insertCells) {
        this.insertCells = insertCells;
    }

    public HashSet<ArkGridCell> getUpdateCells() {
        return updateCells;
    }

    public void setUpdateCells(HashSet<ArkGridCell> updateCells) {
        this.updateCells = updateCells;
    }

    public HashSet<ArkGridCell> getErrorCells() {
        return errorCells;
    }

    public void setErrorCells(HashSet<ArkGridCell> errorCells) {
        this.errorCells = errorCells;
    }

    public HashSet<ArkGridCell> getWarningCells() {
        return warningCells;
    }

    public void setWarningCells(HashSet<ArkGridCell> warningCells) {
        this.warningCells = warningCells;
    }
}