Example usage for weka.core Instances deleteAttributeType

List of usage examples for weka.core Instances deleteAttributeType

Introduction

In this page you can find the example usage for weka.core Instances deleteAttributeType.

Prototype

public void deleteAttributeType(int attType) 

Source Link

Document

Deletes all attributes of the given type in the dataset.

Usage

From source file:sirius.trainer.step3.SelectFeaturePane.java

License:Open Source License

protected void applyFilter(final Filter filter) {
    if (applicationData.getOneThread() == null) {
        applicationData.setOneThread(new Thread() {
            public void run() {
                try {
                    if (filter != null) {
                        //                  String cmd = filter.getClass().getName();
                        //                  if(filter instanceof OptionHandler)
                        //                  cmd += " " + Utils.joinOptions(((OptionHandler) filter).getOptions());            
                        /*comment away for the time being 
                        int classIndex = m_AttVisualizePanel.getColoringIndex();
                        if ((classIndex < 0) && (filter instanceof SupervisedFilter)) {
                        throw new IllegalArgumentException("Class (colour) needs to " +
                              "be set for supervised " +
                              "filter.");
                        }*//*w  w w .java 2 s  .co  m*/
                        Instances copy = new Instances(applicationData.getDataset1Instances());
                        //copy.setClassIndex(classIndex);
                        copy.setClassIndex(applicationData.getDataset1Instances().numAttributes() - 1);
                        copy.deleteAttributeType(Attribute.STRING);
                        filter.setInputFormat(copy);
                        statusPane.setText("Applying Filter.. May take a while.. Please wait..");
                        Instances newInstances = Filter.useFilter(copy, filter);
                        if (newInstances == null || newInstances.numAttributes() < 1) {
                            throw new Exception("Dataset is empty.");
                        }
                        addUndoPoint();
                        //m_AttVisualizePanel.setColoringIndex(copy.classIndex());
                        // if class was not set before, reset it again after use of filter
                        if (applicationData.getDataset1Instances().classIndex() < 0)
                            newInstances.setClassIndex(-1);
                        //dataset1Instances = newInstances;
                        setDataset1Instances(newInstances);
                        statusPane.setText("Filter Applied..");
                    }
                } catch (Exception ex) {
                    // Pop up an error optionpane
                    ex.printStackTrace();
                    JOptionPane.showMessageDialog(parent, "Problem filtering instances:\n" + ex.getMessage(),
                            "Apply Filter", JOptionPane.ERROR_MESSAGE);
                }
                applicationData.setOneThread(null);
            }
        });
        applicationData.getOneThread().setPriority(Thread.MIN_PRIORITY); // UI has most priority
        applicationData.getOneThread().start();
    } else {
        JOptionPane.showMessageDialog(parent,
                "Can't apply filter at this time,\n" + "currently busy with other IO", "Apply Filter",
                JOptionPane.WARNING_MESSAGE);
    }
}

From source file:sirius.trainer.step4.DatasetGenerator.java

License:Open Source License

public static boolean generateDataset2(JInternalFrame parent, ApplicationData applicationData,
        int classifierTwoUpstream, int classifierTwoDownstream, Classifier classifierOne) {
    try {/*from w w w.  ja va  2  s.  c om*/
        StatusPane statusPane = applicationData.getStatusPane();

        int positiveDataset2FromInt = applicationData.getPositiveDataset2FromField();
        int positiveDataset2ToInt = applicationData.getPositiveDataset2ToField();
        int negativeDataset2FromInt = applicationData.getNegativeDataset2FromField();
        int negativeDataset2ToInt = applicationData.getNegativeDataset2ToField();

        int totalDataset2PositiveInstances = positiveDataset2ToInt - positiveDataset2FromInt + 1;
        int totalDataset2NegativeInstances = negativeDataset2ToInt - negativeDataset2FromInt + 1;
        int totalDataset2Instances = totalDataset2PositiveInstances + totalDataset2NegativeInstances;

        int scoringMatrixIndex = applicationData.getScoringMatrixIndex();
        int countingStyleIndex = applicationData.getCountingStyleIndex();

        //Generate the header for Dataset2.arff
        BufferedWriter dataset2OutputFile = new BufferedWriter(
                new FileWriter(applicationData.getWorkingDirectory() + File.separator + "Dataset2.arff"));
        dataset2OutputFile.write("@relation 'Dataset2.arff' ");
        dataset2OutputFile.newLine();
        dataset2OutputFile.newLine();
        dataset2OutputFile.flush();
        for (int x = classifierTwoUpstream; x <= classifierTwoDownstream; x++) {
            if (x != 0) {//This statment is used because in sequence position only -1,+1 dun have 0
                dataset2OutputFile.write("@attribute (" + x + ") numeric");
                dataset2OutputFile.newLine();
                dataset2OutputFile.flush();
            }
        }
        if (positiveDataset2FromInt > 0 && negativeDataset2FromInt > 0)
            dataset2OutputFile.write("@attribute Class {pos,neg}");
        else if (positiveDataset2FromInt > 0 && negativeDataset2FromInt == 0)
            dataset2OutputFile.write("@attribute Class {pos}");
        else if (positiveDataset2FromInt == 0 && negativeDataset2FromInt > 0)
            dataset2OutputFile.write("@attribute Class {neg}");
        dataset2OutputFile.newLine();
        dataset2OutputFile.newLine();
        dataset2OutputFile.write("@data");
        dataset2OutputFile.newLine();
        dataset2OutputFile.newLine();
        dataset2OutputFile.flush();

        //Generating an Instance given a sequence with the current attributes
        //for dataset2.arff

        //Need this for parameter setting for tempInst
        Instances inst = applicationData.getDataset1Instances();
        inst.deleteAttributeType(Attribute.STRING);
        FastaFileManipulation fastaFile = new FastaFileManipulation(
                applicationData.getPositiveStep1TableModel(), applicationData.getNegativeStep1TableModel(),
                positiveDataset2FromInt, positiveDataset2ToInt, negativeDataset2FromInt, negativeDataset2ToInt,
                applicationData.getWorkingDirectory());

        //Reading and Storing the featureList
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            //-1 because class attribute must be ignored
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name()));
        }

        //Reading the fastaFile         
        int lineCounter = 0;
        String _class = "pos";
        FastaFormat fastaFormat;
        while ((fastaFormat = fastaFile.nextSequence(_class)) != null) {
            if (applicationData.terminateThread == true) {
                statusPane.setText("Interrupted - Classifier Two Training Not Complete");
                dataset2OutputFile.close();
                return false;
            }
            lineCounter++;//Putting it here will mean if lineCounter is x then line == sequence x
            //if((lineCounter % 100) == 0){                 
            dataset2OutputFile.flush();
            statusPane.setText("Generating Dataset2.arff.. @ " + lineCounter + " / " + totalDataset2Instances
                    + " Sequences");
            //}
            //For each sequence, you want to shift from upstream till downstream 
            //ie changing the +1 location
            //to get the scores given by classifier one so that you can use it to train classifier two later
            //Doing shift from upstream till downstream                             
            SequenceManipulation seq = new SequenceManipulation(fastaFormat.getSequence(),
                    classifierTwoUpstream, classifierTwoDownstream);
            String line2;
            while ((line2 = seq.nextShift()) != null) {
                Instance tempInst;
                tempInst = new Instance(inst.numAttributes());
                tempInst.setDataset(inst);
                for (int x = 0; x < inst.numAttributes() - 1; x++) {
                    //-1 because class attribute can be ignored
                    //Give the sequence and the featureList to get the feature freqs on the sequence
                    Object obj = GenerateArff.getMatchCount(fastaFormat.getHeader(), line2,
                            featureDataArrayList.get(x), scoringMatrixIndex, countingStyleIndex,
                            applicationData.getScoringMatrix());
                    if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                        tempInst.setValue(x, (Integer) obj);
                    else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                        tempInst.setValue(x, (Double) obj);
                    else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                        tempInst.setValue(x, (String) obj);
                    else {
                        dataset2OutputFile.close();
                        throw new Error("Unknown: " + obj.getClass().getName());
                    }
                }
                tempInst.setValue(inst.numAttributes() - 1, _class);
                double[] results = classifierOne.distributionForInstance(tempInst);
                dataset2OutputFile.write("" + results[0] + ",");
            }
            dataset2OutputFile.write(_class);
            dataset2OutputFile.newLine();
            if (lineCounter == totalDataset2PositiveInstances)
                _class = "neg";
        }
        dataset2OutputFile.close();
        fastaFile.cleanUp();
    } catch (Exception e) {
        e.printStackTrace();
        JOptionPane.showMessageDialog(parent, e.getMessage(), "ERROR", JOptionPane.ERROR_MESSAGE);
        applicationData.getStatusPane().setText("Error - Classifier Two Training Not Complete");
        return false;
    }
    return true;
}

From source file:sirius.trainer.step4.RunClassifier.java

License:Open Source License

public static Classifier startClassifierOne(JInternalFrame parent, ApplicationData applicationData,
        JTextArea classifierOneDisplayTextArea, GenericObjectEditor m_ClassifierEditor, GraphPane myGraph,
        boolean test, ClassifierResults classifierResults, int range, double threshold) {
    try {/*from w  ww.  j av a2  s.  co m*/
        StatusPane statusPane = applicationData.getStatusPane();

        long totalTimeStart = System.currentTimeMillis(), totalTimeElapsed;
        //Setting up training dataset 1 for classifier one
        statusPane.setText("Setting up...");
        //Load Dataset1 Instances
        Instances inst = new Instances(applicationData.getDataset1Instances());
        inst.setClassIndex(applicationData.getDataset1Instances().numAttributes() - 1);
        applicationData.getDataset1Instances()
                .setClassIndex(applicationData.getDataset1Instances().numAttributes() - 1);
        // for timing
        long trainTimeStart = 0, trainTimeElapsed = 0;
        Classifier classifierOne = (Classifier) m_ClassifierEditor.getValue();
        statusPane.setText("Training Classifier One... May take a while... Please wait...");
        trainTimeStart = System.currentTimeMillis();
        inst.deleteAttributeType(Attribute.STRING);
        classifierOne.buildClassifier(inst);
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;

        String classifierName = m_ClassifierEditor.getValue().getClass().getName();
        classifierResults.updateList(classifierResults.getClassifierList(), "Classifier: ", classifierName);
        classifierResults.updateList(classifierResults.getClassifierList(), "Training Data: ",
                applicationData.getWorkingDirectory() + File.separator + "Dataset1.arff");
        classifierResults.updateList(classifierResults.getClassifierList(), "Time Used: ",
                Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds");

        if (test == false) {
            statusPane.setText("Classifier One Training Completed...Done...");
            return classifierOne;
        }
        if (applicationData.terminateThread == true) {
            statusPane.setText("Interrupted - Classifier One Training Completed");
            return classifierOne;
        }
        //Running classifier one on dataset3
        if (statusPane != null)
            statusPane.setText("Running ClassifierOne on Dataset 3..");
        //Step1TableModel positiveStep1TableModel = applicationData.getPositiveStep1TableModel();
        //Step1TableModel negativeStep1TableModel = applicationData.getNegativeStep1TableModel();   
        int positiveDataset3FromInt = applicationData.getPositiveDataset3FromField();
        int positiveDataset3ToInt = applicationData.getPositiveDataset3ToField();
        int negativeDataset3FromInt = applicationData.getNegativeDataset3FromField();
        int negativeDataset3ToInt = applicationData.getNegativeDataset3ToField();

        //Generate the header for ClassifierOne.scores on Dataset3                
        BufferedWriter dataset3OutputFile = new BufferedWriter(new FileWriter(
                applicationData.getWorkingDirectory() + File.separator + "ClassifierOne.scores"));
        if (m_ClassifierEditor.getValue() instanceof OptionHandler)
            classifierName += " "
                    + Utils.joinOptions(((OptionHandler) m_ClassifierEditor.getValue()).getOptions());

        FastaFileManipulation fastaFile = new FastaFileManipulation(
                applicationData.getPositiveStep1TableModel(), applicationData.getNegativeStep1TableModel(),
                positiveDataset3FromInt, positiveDataset3ToInt, negativeDataset3FromInt, negativeDataset3ToInt,
                applicationData.getWorkingDirectory());

        //Reading and Storing the featureList
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            //-1 because class attribute must be ignored
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name()));
        }

        //Reading the fastaFile         
        int lineCounter = 0;
        String _class = "pos";
        int totalDataset3PositiveInstances = positiveDataset3ToInt - positiveDataset3FromInt + 1;
        FastaFormat fastaFormat;
        while ((fastaFormat = fastaFile.nextSequence(_class)) != null) {
            if (applicationData.terminateThread == true) {
                statusPane.setText("Interrupted - Classifier One Training Completed");
                dataset3OutputFile.close();
                return classifierOne;
            }
            lineCounter++;//Putting it here will mean if lineCounter is x then line == sequence x
            dataset3OutputFile.write(fastaFormat.getHeader());
            dataset3OutputFile.newLine();
            dataset3OutputFile.write(fastaFormat.getSequence());
            dataset3OutputFile.newLine();
            //if((lineCounter % 100) == 0){                                 
            statusPane.setText("Running Classifier One on Dataset 3.. @ " + lineCounter + " / "
                    + applicationData.getTotalSequences(3) + " Sequences");
            //}

            // for +1 index being -1, only make one prediction for the whole sequence             
            if (fastaFormat.getIndexLocation() == -1) {
                //Should not have reached here...
                dataset3OutputFile.close();
                throw new Exception("SHOULD NOT HAVE REACHED HERE!!");
            } else {// for +1 index being non -1, make prediction on every possible position
                    //For each sequence, you want to shift from predictPositionFrom till predictPositionTo
                    //ie changing the +1 location
                    //to get the scores given by classifier one so that 
                    //you can use it to train classifier two later
                    //Doing shift from predictPositionFrom till predictPositionTo                
                int predictPosition[];
                predictPosition = fastaFormat.getPredictPositionForClassifierOne(
                        applicationData.getLeftMostPosition(), applicationData.getRightMostPosition());

                SequenceManipulation seq = new SequenceManipulation(fastaFormat.getSequence(),
                        predictPosition[0], predictPosition[1]);
                String line2;
                int currentPosition = predictPosition[0];
                dataset3OutputFile.write(_class);
                while ((line2 = seq.nextShift()) != null) {
                    Instance tempInst;
                    tempInst = new Instance(inst.numAttributes());
                    tempInst.setDataset(inst);
                    for (int x = 0; x < inst.numAttributes() - 1; x++) {
                        //-1 because class attribute can be ignored
                        //Give the sequence and the featureList to get the feature freqs on the sequence
                        Object obj = GenerateArff.getMatchCount(fastaFormat.getHeader(), line2,
                                featureDataArrayList.get(x), applicationData.getScoringMatrixIndex(),
                                applicationData.getCountingStyleIndex(), applicationData.getScoringMatrix());
                        if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                            tempInst.setValue(x, (Integer) obj);
                        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                            tempInst.setValue(x, (Double) obj);
                        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                            tempInst.setValue(x, (String) obj);
                        else {
                            dataset3OutputFile.close();
                            throw new Error("Unknown: " + obj.getClass().getName());
                        }
                    }
                    tempInst.setValue(inst.numAttributes() - 1, _class);
                    double[] results = classifierOne.distributionForInstance(tempInst);
                    dataset3OutputFile.write("," + currentPosition + "=" + results[0]);
                    //AHFU_DEBUG 
                    /*if(currentPosition >= setClassifierTwoUpstreamInt && currentPosition <= setClassifierTwoDownstreamInt)
                       testClassifierTwoArff.write(results[0] + ",");*/
                    //AHFU_DEBUG_END
                    currentPosition++;
                    if (currentPosition == 0)
                        currentPosition++;
                } // end of while((line2 = seq.nextShift())!=null) 
                  //AHFU_DEBUG
                  /*testClassifierTwoArff.write(_class);
                  testClassifierTwoArff.newLine();
                  testClassifierTwoArff.flush();*/
                  //AHFU_DEBUG_END
                dataset3OutputFile.newLine();
                dataset3OutputFile.flush();
                if (lineCounter == totalDataset3PositiveInstances)
                    _class = "neg";
            } //end of inside non -1                                  
        } // end of while((fastaFormat = fastaFile.nextSequence(_class))!=null)       
        dataset3OutputFile.close();
        PredictionStats classifierOneStatsOnBlindTest = new PredictionStats(
                applicationData.getWorkingDirectory() + File.separator + "ClassifierOne.scores", range,
                threshold);
        totalTimeElapsed = System.currentTimeMillis() - totalTimeStart;
        classifierResults.updateList(classifierResults.getResultsList(), "Total Time Used: ",
                Utils.doubleToString(totalTimeElapsed / 60000, 2) + " minutes "
                        + Utils.doubleToString((totalTimeElapsed / 1000.0) % 60.0, 2) + " seconds");
        classifierOneStatsOnBlindTest.updateDisplay(classifierResults, classifierOneDisplayTextArea, true);
        applicationData.setClassifierOneStats(classifierOneStatsOnBlindTest);
        myGraph.setMyStats(classifierOneStatsOnBlindTest);
        statusPane.setText("Done!");
        fastaFile.cleanUp();
        return classifierOne;
    } catch (Exception ex) {
        ex.printStackTrace();
        JOptionPane.showMessageDialog(parent, ex.getMessage() + "Classifier One on Blind Test Set",
                "Evaluate classifier", JOptionPane.ERROR_MESSAGE);
        return null;
    }
}

From source file:sirius.trainer.step4.RunClassifier.java

License:Open Source License

public static Classifier xValidateClassifierOne(JInternalFrame parent, ApplicationData applicationData,
        JTextArea classifierOneDisplayTextArea, GenericObjectEditor m_ClassifierEditor, int folds,
        GraphPane myGraph, ClassifierResults classifierResults, int range, double threshold,
        boolean outputClassifier) {
    try {/*from  w  ww. ja  v  a 2 s  . co  m*/
        StatusPane statusPane = applicationData.getStatusPane();

        long totalTimeStart = System.currentTimeMillis(), totalTimeElapsed;
        //Classifier tempClassifier = (Classifier) m_ClassifierEditor.getValue();
        int positiveDataset1FromInt = applicationData.getPositiveDataset1FromField();
        int positiveDataset1ToInt = applicationData.getPositiveDataset1ToField();
        int negativeDataset1FromInt = applicationData.getNegativeDataset1FromField();
        int negativeDataset1ToInt = applicationData.getNegativeDataset1ToField();

        Step1TableModel positiveStep1TableModel = applicationData.getPositiveStep1TableModel();
        Step1TableModel negativeStep1TableModel = applicationData.getNegativeStep1TableModel();

        Instances inst = new Instances(applicationData.getDataset1Instances());
        inst.setClassIndex(applicationData.getDataset1Instances().numAttributes() - 1);

        //Train classifier one with the full dataset first then do cross-validation to gauge its accuracy                    
        long trainTimeStart = 0, trainTimeElapsed = 0;
        Classifier classifierOne = (Classifier) m_ClassifierEditor.getValue();
        statusPane.setText("Training Classifier One... May take a while... Please wait...");
        //Record Start Time
        trainTimeStart = System.currentTimeMillis();
        inst.deleteAttributeType(Attribute.STRING);
        if (outputClassifier)
            classifierOne.buildClassifier(inst);
        //Record Total Time used to build classifier one
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
        //Training Done                        

        String classifierName = m_ClassifierEditor.getValue().getClass().getName();
        classifierResults.updateList(classifierResults.getClassifierList(), "Classifier: ", classifierName);
        classifierResults.updateList(classifierResults.getClassifierList(), "Training Data: ",
                folds + " fold cross-validation on Dataset1.arff");
        classifierResults.updateList(classifierResults.getClassifierList(), "Time Used: ",
                Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds");

        //Reading and Storing the featureList
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int y = 0; y < inst.numAttributes() - 1; y++) {
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(y).name()));
        }

        BufferedWriter outputCrossValidation = new BufferedWriter(new FileWriter(
                applicationData.getWorkingDirectory() + File.separator + "ClassifierOne.scores"));

        for (int x = 0; x < folds; x++) {
            File trainFile = new File(applicationData.getWorkingDirectory() + File.separator
                    + "trainingDataset1_" + (x + 1) + ".arff");
            File testFile = new File(applicationData.getWorkingDirectory() + File.separator + "testingDataset1_"
                    + (x + 1) + ".fasta");
            //AHFU_DEBUG
            //Generate also the training file in fasta format for debugging purpose
            File trainFileFasta = new File(applicationData.getWorkingDirectory() + File.separator
                    + "trainingDataset1_" + (x + 1) + ".fasta");
            //AHFU_DEBUG_END

            //AHFU_DEBUG - This part is to generate the TestClassifierTwo.arff for use in WEKA to test classifierTwo
            //TestClassifierTwo.arff - predictions scores from Set Upstream Field to Set Downstream Field
            //Now first generate the header for TestClassifierTwo.arff
            BufferedWriter testClassifierTwoArff = new BufferedWriter(
                    new FileWriter(applicationData.getWorkingDirectory() + File.separator + "TestClassifierTwo_"
                            + (x + 1) + ".arff"));
            int setClassifierTwoUpstreamInt = -40;
            int setClassifierTwoDownstreamInt = 41;
            testClassifierTwoArff.write("@relation \'Used to Test Classifier Two\'");
            testClassifierTwoArff.newLine();
            for (int d = setClassifierTwoUpstreamInt; d <= setClassifierTwoDownstreamInt; d++) {
                if (d == 0)
                    continue;
                testClassifierTwoArff.write("@attribute (" + d + ") numeric");
                testClassifierTwoArff.newLine();
            }
            if (positiveDataset1FromInt > 0 && negativeDataset1FromInt > 0)
                testClassifierTwoArff.write("@attribute Class {pos,neg}");
            else if (positiveDataset1FromInt > 0 && negativeDataset1FromInt == 0)
                testClassifierTwoArff.write("@attribute Class {pos}");
            else if (positiveDataset1FromInt == 0 && negativeDataset1FromInt > 0)
                testClassifierTwoArff.write("@attribute Class {neg}");
            testClassifierTwoArff.newLine();
            testClassifierTwoArff.newLine();
            testClassifierTwoArff.write("@data");
            testClassifierTwoArff.newLine();
            testClassifierTwoArff.newLine();
            //END of AHFU_DEBUG
            statusPane.setText("Building Fold " + (x + 1) + "...");
            FastaFileManipulation fastaFile = new FastaFileManipulation(positiveStep1TableModel,
                    negativeStep1TableModel, positiveDataset1FromInt, positiveDataset1ToInt,
                    negativeDataset1FromInt, negativeDataset1ToInt, applicationData.getWorkingDirectory());

            //1) generate trainingDatasetX.arff headings
            BufferedWriter trainingOutputFile = new BufferedWriter(
                    new FileWriter(applicationData.getWorkingDirectory() + File.separator + "trainingDataset1_"
                            + (x + 1) + ".arff"));
            trainingOutputFile.write("@relation 'A temp file for X-validation purpose' ");
            trainingOutputFile.newLine();
            trainingOutputFile.newLine();
            trainingOutputFile.flush();

            for (int y = 0; y < inst.numAttributes() - 1; y++) {
                if (inst.attribute(y).type() == Attribute.NUMERIC)
                    trainingOutputFile.write("@attribute " + inst.attribute(y).name() + " numeric");
                else if (inst.attribute(y).type() == Attribute.STRING)
                    trainingOutputFile.write("@attribute " + inst.attribute(y).name() + " String");
                else {
                    testClassifierTwoArff.close();
                    outputCrossValidation.close();
                    trainingOutputFile.close();
                    throw new Error("Unknown type: " + inst.attribute(y).name());
                }
                trainingOutputFile.newLine();
                trainingOutputFile.flush();
            }
            if (positiveDataset1FromInt > 0 && negativeDataset1FromInt > 0)
                trainingOutputFile.write("@attribute Class {pos,neg}");
            else if (positiveDataset1FromInt > 0 && negativeDataset1FromInt == 0)
                trainingOutputFile.write("@attribute Class {pos}");
            else if (positiveDataset1FromInt == 0 && negativeDataset1FromInt > 0)
                trainingOutputFile.write("@attribute Class {neg}");
            trainingOutputFile.newLine();
            trainingOutputFile.newLine();
            trainingOutputFile.write("@data");
            trainingOutputFile.newLine();
            trainingOutputFile.newLine();
            trainingOutputFile.flush();

            //2) generate testingDataset1.fasta
            BufferedWriter testingOutputFile = new BufferedWriter(
                    new FileWriter(applicationData.getWorkingDirectory() + File.separator + "testingDataset1_"
                            + (x + 1) + ".fasta"));

            //AHFU_DEBUG
            //Open the IOStream for training file (fasta format)
            BufferedWriter trainingOutputFileFasta = new BufferedWriter(
                    new FileWriter(applicationData.getWorkingDirectory() + File.separator + "trainingDataset1_"
                            + (x + 1) + ".fasta"));
            //AHFU_DEBUG_END

            //Now, populating data for both the training and testing files            
            int fastaFileLineCounter = 0;
            int posTestSequenceCounter = 0;
            int totalTestSequenceCounter = 0;
            //For pos sequences   
            FastaFormat fastaFormat;
            while ((fastaFormat = fastaFile.nextSequence("pos")) != null) {
                if ((fastaFileLineCounter % folds) == x) {//This sequence for testing
                    testingOutputFile.write(fastaFormat.getHeader());
                    testingOutputFile.newLine();
                    testingOutputFile.write(fastaFormat.getSequence());
                    testingOutputFile.newLine();
                    testingOutputFile.flush();
                    posTestSequenceCounter++;
                    totalTestSequenceCounter++;
                } else {//for training
                    for (int z = 0; z < inst.numAttributes() - 1; z++) {
                        trainingOutputFile.write(GenerateArff.getMatchCount(fastaFormat,
                                featureDataArrayList.get(z), applicationData.getScoringMatrixIndex(),
                                applicationData.getCountingStyleIndex(), applicationData.getScoringMatrix())
                                + ",");
                    }
                    trainingOutputFile.write("pos");
                    trainingOutputFile.newLine();
                    trainingOutputFile.flush();

                    //AHFU_DEBUG
                    //Write the datas into the training file in fasta format
                    trainingOutputFileFasta.write(fastaFormat.getHeader());
                    trainingOutputFileFasta.newLine();
                    trainingOutputFileFasta.write(fastaFormat.getSequence());
                    trainingOutputFileFasta.newLine();
                    trainingOutputFileFasta.flush();
                    //AHFU_DEBUG_END
                }
                fastaFileLineCounter++;
            }
            //For neg sequences
            fastaFileLineCounter = 0;
            while ((fastaFormat = fastaFile.nextSequence("neg")) != null) {
                if ((fastaFileLineCounter % folds) == x) {//This sequence for testing
                    testingOutputFile.write(fastaFormat.getHeader());
                    testingOutputFile.newLine();
                    testingOutputFile.write(fastaFormat.getSequence());
                    testingOutputFile.newLine();
                    testingOutputFile.flush();
                    totalTestSequenceCounter++;
                } else {//for training
                    for (int z = 0; z < inst.numAttributes() - 1; z++) {
                        trainingOutputFile.write(GenerateArff.getMatchCount(fastaFormat,
                                featureDataArrayList.get(z), applicationData.getScoringMatrixIndex(),
                                applicationData.getCountingStyleIndex(), applicationData.getScoringMatrix())
                                + ",");
                    }
                    trainingOutputFile.write("neg");
                    trainingOutputFile.newLine();
                    trainingOutputFile.flush();

                    //AHFU_DEBUG
                    //Write the datas into the training file in fasta format
                    trainingOutputFileFasta.write(fastaFormat.getHeader());
                    trainingOutputFileFasta.newLine();
                    trainingOutputFileFasta.write(fastaFormat.getSequence());
                    trainingOutputFileFasta.newLine();
                    trainingOutputFileFasta.flush();
                    //AHFU_DEBUG_END
                }
                fastaFileLineCounter++;
            }
            trainingOutputFileFasta.close();
            trainingOutputFile.close();
            testingOutputFile.close();
            //3) train and test the classifier then store the statistics              
            Classifier foldClassifier = (Classifier) m_ClassifierEditor.getValue();
            Instances instFoldTrain = new Instances(
                    new BufferedReader(new FileReader(applicationData.getWorkingDirectory() + File.separator
                            + "trainingDataset1_" + (x + 1) + ".arff")));
            instFoldTrain.setClassIndex(instFoldTrain.numAttributes() - 1);
            foldClassifier.buildClassifier(instFoldTrain);

            //Reading the test file
            statusPane.setText("Evaluating fold " + (x + 1) + "..");
            BufferedReader testingInput = new BufferedReader(
                    new FileReader(applicationData.getWorkingDirectory() + File.separator + "testingDataset1_"
                            + (x + 1) + ".fasta"));
            int lineCounter = 0;
            String lineHeader;
            String lineSequence;
            while ((lineHeader = testingInput.readLine()) != null) {
                if (applicationData.terminateThread == true) {
                    statusPane.setText("Interrupted - Classifier One Training Completed");
                    testingInput.close();
                    testClassifierTwoArff.close();
                    return classifierOne;
                }
                lineSequence = testingInput.readLine();
                outputCrossValidation.write(lineHeader);
                outputCrossValidation.newLine();
                outputCrossValidation.write(lineSequence);
                outputCrossValidation.newLine();
                lineCounter++;
                //For each sequence, you want to shift from upstream till downstream 
                //ie changing the +1 location
                //to get the scores by classifier one so that can use it to train classifier two later
                //Doing shift from upstream till downstream    
                //if(lineCounter % 100 == 0)
                statusPane.setText("Evaluating fold " + (x + 1) + ".. @ " + lineCounter + " / "
                        + totalTestSequenceCounter);

                fastaFormat = new FastaFormat(lineHeader, lineSequence);
                int predictPosition[] = fastaFormat.getPredictPositionForClassifierOne(
                        applicationData.getLeftMostPosition(), applicationData.getRightMostPosition());

                SequenceManipulation seq = new SequenceManipulation(lineSequence, predictPosition[0],
                        predictPosition[1]);
                int currentPosition = predictPosition[0];
                String line2;
                if (lineCounter > posTestSequenceCounter)
                    outputCrossValidation.write("neg");
                else
                    outputCrossValidation.write("pos");
                while ((line2 = seq.nextShift()) != null) {
                    Instance tempInst;
                    tempInst = new Instance(inst.numAttributes());
                    tempInst.setDataset(inst);
                    for (int i = 0; i < inst.numAttributes() - 1; i++) {
                        //-1 because class attribute can be ignored
                        //Give the sequence and the featureList to get the feature freqs on the sequence
                        Object obj = GenerateArff.getMatchCount(lineHeader, line2, featureDataArrayList.get(i),
                                applicationData.getScoringMatrixIndex(),
                                applicationData.getCountingStyleIndex(), applicationData.getScoringMatrix());
                        if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                            tempInst.setValue(x, (Integer) obj);
                        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                            tempInst.setValue(x, (Double) obj);
                        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                            tempInst.setValue(x, (String) obj);
                        else {
                            testingInput.close();
                            testClassifierTwoArff.close();
                            outputCrossValidation.close();
                            throw new Error("Unknown: " + obj.getClass().getName());
                        }
                    }
                    if (lineCounter > posTestSequenceCounter)
                        tempInst.setValue(inst.numAttributes() - 1, "neg");
                    else
                        tempInst.setValue(inst.numAttributes() - 1, "pos");
                    double[] results = foldClassifier.distributionForInstance(tempInst);
                    outputCrossValidation.write("," + currentPosition + "=" + results[0]);
                    //AHFU_DEBUG 
                    double[] resultsDebug = classifierOne.distributionForInstance(tempInst);
                    if (currentPosition >= setClassifierTwoUpstreamInt
                            && currentPosition <= setClassifierTwoDownstreamInt)
                        testClassifierTwoArff.write(resultsDebug[0] + ",");
                    //AHFU_DEBUG_END
                    currentPosition++;
                    if (currentPosition == 0)
                        currentPosition++;
                } //end of sequence shift                               
                outputCrossValidation.newLine();
                outputCrossValidation.flush();
                //AHFU_DEBUG
                if (lineCounter > posTestSequenceCounter)
                    testClassifierTwoArff.write("neg");
                else
                    testClassifierTwoArff.write("pos");
                testClassifierTwoArff.newLine();
                testClassifierTwoArff.flush();
                //AHFU_DEBUG_END
            } //end of reading test file
            outputCrossValidation.close();
            testingInput.close();
            testClassifierTwoArff.close();
            fastaFile.cleanUp();

            //NORMAL MODE
            //trainFile.delete();
            //testFile.delete();
            //NORMAL MODE END
            //AHFU_DEBUG MODE
            //testClassifierTwoArff.close();            
            trainFile.deleteOnExit();
            testFile.deleteOnExit();
            trainFileFasta.deleteOnExit();
            //AHFU_DEBUG_MODE_END
        } //end of for loop for xvalidation

        PredictionStats classifierOneStatsOnXValidation = new PredictionStats(
                applicationData.getWorkingDirectory() + File.separator + "ClassifierOne.scores", range,
                threshold);
        //display(double range)
        totalTimeElapsed = System.currentTimeMillis() - totalTimeStart;
        classifierResults.updateList(classifierResults.getResultsList(), "Total Time Used: ",
                Utils.doubleToString(totalTimeElapsed / 60000, 2) + " minutes "
                        + Utils.doubleToString((totalTimeElapsed / 1000.0) % 60.0, 2) + " seconds");
        classifierOneStatsOnXValidation.updateDisplay(classifierResults, classifierOneDisplayTextArea, true);
        applicationData.setClassifierOneStats(classifierOneStatsOnXValidation);
        myGraph.setMyStats(classifierOneStatsOnXValidation);

        statusPane.setText("Done!");

        return classifierOne;
    } catch (Exception e) {
        e.printStackTrace();
        JOptionPane.showMessageDialog(parent, e.getMessage(), "ERROR", JOptionPane.ERROR_MESSAGE);
        return null;
    }
}

From source file:sirius.trainer.step4.RunClassifierWithNoLocationIndex.java

License:Open Source License

public static Object startClassifierOneWithNoLocationIndex(JInternalFrame parent,
        ApplicationData applicationData, JTextArea classifierOneDisplayTextArea, GraphPane myGraph,
        boolean test, ClassifierResults classifierResults, int range, double threshold, String classifierName,
        String[] classifierOptions, boolean returnClassifier, GeneticAlgorithmDialog gaDialog,
        int randomNumberForClassifier) {
    try {//from ww w . j a  v  a  2s  .  c o m

        if (gaDialog != null) {
            //Run GA then load the result maxMCCFeatures into applicationData->Dataset1Instances
            int positiveDataset1FromInt = applicationData.getPositiveDataset1FromField();
            int positiveDataset1ToInt = applicationData.getPositiveDataset1ToField();
            int negativeDataset1FromInt = applicationData.getNegativeDataset1FromField();
            int negativeDataset1ToInt = applicationData.getNegativeDataset1ToField();
            FastaFileManipulation fastaFile = new FastaFileManipulation(
                    applicationData.getPositiveStep1TableModel(), applicationData.getNegativeStep1TableModel(),
                    positiveDataset1FromInt, positiveDataset1ToInt, negativeDataset1FromInt,
                    negativeDataset1ToInt, applicationData.getWorkingDirectory());
            FastaFormat fastaFormat;
            List<FastaFormat> posFastaList = new ArrayList<FastaFormat>();
            List<FastaFormat> negFastaList = new ArrayList<FastaFormat>();
            while ((fastaFormat = fastaFile.nextSequence("pos")) != null) {
                posFastaList.add(fastaFormat);
            }
            while ((fastaFormat = fastaFile.nextSequence("neg")) != null) {
                negFastaList.add(fastaFormat);
            }
            applicationData.setDataset1Instances(
                    runDAandLoadResult(applicationData, gaDialog, posFastaList, negFastaList));
        }

        StatusPane statusPane = applicationData.getStatusPane();
        long totalTimeStart = System.currentTimeMillis(), totalTimeElapsed;
        //Setting up training data set 1 for classifier one      
        if (statusPane != null)
            statusPane.setText("Setting up...");
        //Load Dataset1 Instances
        Instances inst = new Instances(applicationData.getDataset1Instances());
        inst.setClassIndex(applicationData.getDataset1Instances().numAttributes() - 1);
        applicationData.getDataset1Instances()
                .setClassIndex(applicationData.getDataset1Instances().numAttributes() - 1);
        // for recording of time
        long trainTimeStart = 0, trainTimeElapsed = 0;
        Classifier classifierOne = Classifier.forName(classifierName, classifierOptions);
        /*//Used to show the classifierName and options so that I can use them for qsub
        System.out.println(classifierName);
        String[] optionString = classifierOne.getOptions();
        for(int x = 0; x < optionString.length; x++)
           System.out.println(optionString[x]);*/
        if (statusPane != null)
            statusPane.setText("Training Classifier One... May take a while... Please wait...");
        //Record Start Time
        trainTimeStart = System.currentTimeMillis();
        //Train Classifier One            
        inst.deleteAttributeType(Attribute.STRING);
        classifierOne.buildClassifier(inst);
        //Record Total Time used to build classifier one
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;

        if (classifierResults != null) {
            classifierResults.updateList(classifierResults.getClassifierList(), "Classifier: ", classifierName);
            classifierResults.updateList(classifierResults.getClassifierList(), "Training Data: ",
                    applicationData.getWorkingDirectory() + File.separator + "Dataset1.arff");
            classifierResults.updateList(classifierResults.getClassifierList(), "Time Used: ",
                    Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds");
        }
        if (test == false) {
            //If Need Not Test option is selected
            if (statusPane != null)
                statusPane.setText("Done!");
            return classifierOne;
        }
        if (applicationData.terminateThread == true) {
            //If Stop button is pressed
            if (statusPane != null)
                statusPane.setText("Interrupted - Classifier One Training Completed");
            return classifierOne;
        }
        //Running classifier one on dataset3
        if (statusPane != null)
            statusPane.setText("Running ClassifierOne on Dataset 3..");
        int positiveDataset3FromInt = applicationData.getPositiveDataset3FromField();
        int positiveDataset3ToInt = applicationData.getPositiveDataset3ToField();
        int negativeDataset3FromInt = applicationData.getNegativeDataset3FromField();
        int negativeDataset3ToInt = applicationData.getNegativeDataset3ToField();

        //Generate the header for ClassifierOne.scores on Dataset3      
        String classifierOneFilename = applicationData.getWorkingDirectory() + File.separator + "ClassifierOne_"
                + randomNumberForClassifier + ".scores";
        BufferedWriter dataset3OutputFile = new BufferedWriter(new FileWriter(classifierOneFilename));
        FastaFileManipulation fastaFile = new FastaFileManipulation(
                applicationData.getPositiveStep1TableModel(), applicationData.getNegativeStep1TableModel(),
                positiveDataset3FromInt, positiveDataset3ToInt, negativeDataset3FromInt, negativeDataset3ToInt,
                applicationData.getWorkingDirectory());

        //Reading and Storing the featureList
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            //-1 because class attribute must be ignored
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name()));
        }

        //Reading the fastaFile      
        int lineCounter = 0;
        String _class = "pos";
        int totalDataset3PositiveInstances = positiveDataset3ToInt - positiveDataset3FromInt + 1;
        FastaFormat fastaFormat;
        while ((fastaFormat = fastaFile.nextSequence(_class)) != null) {
            if (applicationData.terminateThread == true) {
                if (statusPane != null)
                    statusPane.setText("Interrupted - Classifier One Training Completed");
                dataset3OutputFile.close();
                return classifierOne;
            }
            dataset3OutputFile.write(fastaFormat.getHeader());
            dataset3OutputFile.newLine();
            dataset3OutputFile.write(fastaFormat.getSequence());
            dataset3OutputFile.newLine();
            lineCounter++;//Putting it here will mean if lineCounter is x then line == sequence x                              
            dataset3OutputFile.flush();
            if (statusPane != null)
                statusPane.setText("Running Classifier One on Dataset 3.. @ " + lineCounter + " / "
                        + applicationData.getTotalSequences(3) + " Sequences");
            Instance tempInst;
            tempInst = new Instance(inst.numAttributes());
            tempInst.setDataset(inst);
            for (int x = 0; x < inst.numAttributes() - 1; x++) {
                //-1 because class attribute can be ignored
                //Give the sequence and the featureList to get the feature freqs on the sequence
                Object obj = GenerateArff.getMatchCount(fastaFormat, featureDataArrayList.get(x),
                        applicationData.getScoringMatrixIndex(), applicationData.getCountingStyleIndex(),
                        applicationData.getScoringMatrix());
                if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                    tempInst.setValue(x, (Integer) obj);
                else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                    tempInst.setValue(x, (Double) obj);
                else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                    tempInst.setValue(x, (String) obj);
                else {
                    dataset3OutputFile.close();
                    throw new Error("Unknown: " + obj.getClass().getName());
                }
            }
            tempInst.setValue(inst.numAttributes() - 1, _class);
            double[] results = classifierOne.distributionForInstance(tempInst);
            dataset3OutputFile.write(_class + ",0=" + results[0]);
            dataset3OutputFile.newLine();
            dataset3OutputFile.flush();
            if (lineCounter == totalDataset3PositiveInstances)
                _class = "neg";
        }
        dataset3OutputFile.close();

        //Display Statistics by reading the ClassifierOne.scores
        PredictionStats classifierOneStatsOnBlindTest = new PredictionStats(classifierOneFilename, range,
                threshold);
        //display(double range)
        totalTimeElapsed = System.currentTimeMillis() - totalTimeStart;
        if (classifierResults != null) {
            classifierResults.updateList(classifierResults.getResultsList(), "Total Time Used: ",
                    Utils.doubleToString(totalTimeElapsed / 60000, 2) + " minutes "
                            + Utils.doubleToString((totalTimeElapsed / 1000.0) % 60.0, 2) + " seconds");
            classifierOneStatsOnBlindTest.updateDisplay(classifierResults, classifierOneDisplayTextArea, true);
        } else
            classifierOneStatsOnBlindTest.updateDisplay(classifierResults, classifierOneDisplayTextArea, true);
        applicationData.setClassifierOneStats(classifierOneStatsOnBlindTest);
        if (myGraph != null)
            myGraph.setMyStats(classifierOneStatsOnBlindTest);
        if (statusPane != null)
            statusPane.setText("Done!");
        fastaFile.cleanUp();
        if (returnClassifier)
            return classifierOne;
        else
            return classifierOneStatsOnBlindTest;
    } catch (Exception ex) {
        ex.printStackTrace();
        JOptionPane.showMessageDialog(parent, ex.getMessage(), "Evaluate classifier",
                JOptionPane.ERROR_MESSAGE);
        return null;
    }
}

From source file:sirius.trainer.step4.RunClassifierWithNoLocationIndex.java

License:Open Source License

public static Object jackKnifeClassifierOneWithNoLocationIndex(JInternalFrame parent,
        ApplicationData applicationData, JTextArea classifierOneDisplayTextArea,
        GenericObjectEditor m_ClassifierEditor, double ratio, GraphPane myGraph,
        ClassifierResults classifierResults, int range, double threshold, boolean outputClassifier,
        String classifierName, String[] classifierOptions, boolean returnClassifier,
        int randomNumberForClassifier) {
    try {//from ww w  .j  ava2  s . co  m
        StatusPane statusPane = applicationData.getStatusPane();

        long totalTimeStart = System.currentTimeMillis(), totalTimeElapsed;
        Classifier tempClassifier;
        if (m_ClassifierEditor != null)
            tempClassifier = (Classifier) m_ClassifierEditor.getValue();
        else
            tempClassifier = Classifier.forName(classifierName, classifierOptions);

        //Assume that class attribute is the last attribute - This should be the case for all Sirius produced Arff files               
        //split the instances into positive and negative
        Instances posInst = new Instances(applicationData.getDataset1Instances());
        posInst.setClassIndex(posInst.numAttributes() - 1);
        for (int x = 0; x < posInst.numInstances();)
            if (posInst.instance(x).stringValue(posInst.numAttributes() - 1).equalsIgnoreCase("pos"))
                x++;
            else
                posInst.delete(x);
        posInst.deleteAttributeType(Attribute.STRING);
        Instances negInst = new Instances(applicationData.getDataset1Instances());
        negInst.setClassIndex(negInst.numAttributes() - 1);
        for (int x = 0; x < negInst.numInstances();)
            if (negInst.instance(x).stringValue(negInst.numAttributes() - 1).equalsIgnoreCase("neg"))
                x++;
            else
                negInst.delete(x);
        negInst.deleteAttributeType(Attribute.STRING);
        //Train classifier one with the full dataset first then do cross-validation to gauge its accuracy   
        long trainTimeStart = 0, trainTimeElapsed = 0;
        if (statusPane != null)
            statusPane.setText("Training Classifier One... May take a while... Please wait...");
        //Record Start Time
        trainTimeStart = System.currentTimeMillis();
        Instances fullInst = new Instances(applicationData.getDataset1Instances());
        fullInst.setClassIndex(fullInst.numAttributes() - 1);
        Classifier classifierOne;
        if (m_ClassifierEditor != null)
            classifierOne = (Classifier) m_ClassifierEditor.getValue();
        else
            classifierOne = Classifier.forName(classifierName, classifierOptions);
        if (outputClassifier)
            classifierOne.buildClassifier(fullInst);
        //Record Total Time used to build classifier one
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
        //Training Done

        String tclassifierName;
        if (m_ClassifierEditor != null)
            tclassifierName = m_ClassifierEditor.getValue().getClass().getName();
        else
            tclassifierName = classifierName;
        if (classifierResults != null) {
            classifierResults.updateList(classifierResults.getClassifierList(), "Classifier: ",
                    tclassifierName);
            classifierResults.updateList(classifierResults.getClassifierList(), "Training Data: ",
                    " Jack Knife Validation");
            classifierResults.updateList(classifierResults.getClassifierList(), "Time Used: ",
                    Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds");
        }
        String classifierOneFilename = applicationData.getWorkingDirectory() + File.separator + "ClassifierOne_"
                + randomNumberForClassifier + ".scores";
        BufferedWriter outputCrossValidation = new BufferedWriter(new FileWriter(classifierOneFilename));

        //Instances foldTrainingInstance;
        //Instances foldTestingInstance;
        int positiveDataset1FromInt = applicationData.getPositiveDataset1FromField();
        int positiveDataset1ToInt = applicationData.getPositiveDataset1ToField();
        int negativeDataset1FromInt = applicationData.getNegativeDataset1FromField();
        int negativeDataset1ToInt = applicationData.getNegativeDataset1ToField();
        Step1TableModel positiveStep1TableModel = applicationData.getPositiveStep1TableModel();
        Step1TableModel negativeStep1TableModel = applicationData.getNegativeStep1TableModel();
        FastaFileManipulation fastaFile = new FastaFileManipulation(positiveStep1TableModel,
                negativeStep1TableModel, positiveDataset1FromInt, positiveDataset1ToInt,
                negativeDataset1FromInt, negativeDataset1ToInt, applicationData.getWorkingDirectory());
        FastaFormat fastaFormat;
        String header[] = new String[fullInst.numInstances()];
        String data[] = new String[fullInst.numInstances()];
        int counter = 0;
        while ((fastaFormat = fastaFile.nextSequence("pos")) != null) {
            header[counter] = fastaFormat.getHeader();
            data[counter] = fastaFormat.getSequence();
            counter++;
        }
        while ((fastaFormat = fastaFile.nextSequence("neg")) != null) {
            header[counter] = fastaFormat.getHeader();
            data[counter] = fastaFormat.getSequence();
            counter++;
        }

        //run jack knife validation
        for (int x = 0; x < fullInst.numInstances(); x++) {
            if (applicationData.terminateThread == true) {
                if (statusPane != null)
                    statusPane.setText("Interrupted - Classifier One Training Completed");
                outputCrossValidation.close();
                return classifierOne;
            }
            if (statusPane != null)
                statusPane.setText("Running " + (x + 1) + " / " + fullInst.numInstances());
            Instances trainPosInst = new Instances(posInst);
            Instances trainNegInst = new Instances(negInst);
            Instance testInst;
            //split data into training and testing
            if (x < trainPosInst.numInstances()) {
                testInst = posInst.instance(x);
                trainPosInst.delete(x);
            } else {
                testInst = negInst.instance(x - posInst.numInstances());
                trainNegInst.delete(x - posInst.numInstances());
            }
            Instances trainInstances;
            if (trainPosInst.numInstances() < trainNegInst.numInstances()) {
                trainInstances = new Instances(trainPosInst);
                int max = (int) (ratio * trainPosInst.numInstances());
                if (ratio == -1)
                    max = trainNegInst.numInstances();
                Random rand = new Random(1);
                for (int y = 0; y < trainNegInst.numInstances() && y < max; y++) {
                    int index = rand.nextInt(trainNegInst.numInstances());
                    trainInstances.add(trainNegInst.instance(index));
                    trainNegInst.delete(index);
                }
            } else {
                trainInstances = new Instances(trainNegInst);
                int max = (int) (ratio * trainNegInst.numInstances());
                if (ratio == -1)
                    max = trainPosInst.numInstances();
                Random rand = new Random(1);
                for (int y = 0; y < trainPosInst.numInstances() && y < max; y++) {
                    int index = rand.nextInt(trainPosInst.numInstances());
                    trainInstances.add(trainPosInst.instance(index));
                    trainPosInst.delete(index);
                }
            }
            Classifier foldClassifier = tempClassifier;
            foldClassifier.buildClassifier(trainInstances);
            double[] results = foldClassifier.distributionForInstance(testInst);
            int classIndex = testInst.classIndex();
            String classValue = testInst.toString(classIndex);
            outputCrossValidation.write(header[x]);
            outputCrossValidation.newLine();
            outputCrossValidation.write(data[x]);
            outputCrossValidation.newLine();
            if (classValue.equals("pos"))
                outputCrossValidation.write("pos,0=" + results[0]);
            else if (classValue.equals("neg"))
                outputCrossValidation.write("neg,0=" + results[0]);
            else {
                outputCrossValidation.close();
                throw new Error("Invalid Class Type!");
            }
            outputCrossValidation.newLine();
            outputCrossValidation.flush();
        }
        outputCrossValidation.close();
        PredictionStats classifierOneStatsOnJackKnife = new PredictionStats(classifierOneFilename, range,
                threshold);
        totalTimeElapsed = System.currentTimeMillis() - totalTimeStart;
        if (classifierResults != null)
            classifierResults.updateList(classifierResults.getResultsList(), "Total Time Used: ",
                    Utils.doubleToString(totalTimeElapsed / 60000, 2) + " minutes "
                            + Utils.doubleToString((totalTimeElapsed / 1000.0) % 60.0, 2) + " seconds");

        //if(classifierOneDisplayTextArea != null)
        classifierOneStatsOnJackKnife.updateDisplay(classifierResults, classifierOneDisplayTextArea, true);
        applicationData.setClassifierOneStats(classifierOneStatsOnJackKnife);
        if (myGraph != null)
            myGraph.setMyStats(classifierOneStatsOnJackKnife);

        if (statusPane != null)
            statusPane.setText("Done!");
        if (returnClassifier)
            return classifierOne;
        else
            return classifierOneStatsOnJackKnife;
    } catch (Exception e) {
        e.printStackTrace();
        JOptionPane.showMessageDialog(parent, e.getMessage(), "ERROR", JOptionPane.ERROR_MESSAGE);
        return null;
    }
}