Example usage for edu.stanford.nlp.classify ColumnDataClassifier ColumnDataClassifier

List of usage examples for edu.stanford.nlp.classify ColumnDataClassifier ColumnDataClassifier

Introduction

In this page you can find the example usage for edu.stanford.nlp.classify ColumnDataClassifier ColumnDataClassifier.

Prototype

public ColumnDataClassifier(Pair<Flags[], Classifier<String, String>> flagsClassifierPair) 

Source Link

Document

Construct a ColumnDataClassifier.

Usage

From source file:my.demo.DemoUI.java

License:Open Source License

private void runMyClassifier() {
    //This funtion is used to identify proteins and genes from a given
    //random abstract by using the stanfor classifier

    try {/*from w w w . j  a v  a2s.c  o  m*/
        int count = 1;
        int position = 0;

        Scanner sc2 = null;

        try {
            sc2 = new Scanner(new File(Variables.filePathTxt));
            logIt("Reading The abstract...");
            // to get start and end indices

        } catch (FileNotFoundException e1) {
            // TODO Auto-generated catch block!!
            JOptionPane.showMessageDialog(null, "ERROR!!!File Not Found!!");
            //e1.printStackTrace();
        }

        File file = new File(Variables.bratPath + Variables.fileName + ".test");

        logIt("Processing The abstract...");

        FileWriter fw = new FileWriter(file.getAbsoluteFile());
        BufferedWriter bw = new BufferedWriter(fw);

        while (sc2.hasNext()) {
            String s = sc2.next();
            bw.write("-" + "\t" + s + "\n");
        }

        bw.close();

        file = new File(Variables.bratPath + Variables.fileName + ".ann");
        logIt("Running the Classifier...");

        fw = new FileWriter(file.getAbsoluteFile());
        bw = new BufferedWriter(fw);

        ColumnDataClassifier cdc = new ColumnDataClassifier(Variables.filePathProp);
        Classifier<String, String> cl = cdc.makeClassifier(cdc.readTrainingExamples(Variables.filePathTrain));
        for (String line : ObjectBank.getLineIterator(Variables.bratPath + Variables.fileName + ".test",
                "utf-8")) {

            Datum<String, String> d = cdc.makeDatumFromLine(line);
            System.out.println(line + "  ==>  " + cl.classOf(d) + "--");
            String myclass = "";
            if (!cl.classOf(d).equals("Others")) {
                switch (cl.classOf(d)) {
                case "gene":
                    myclass = "Gene-Level";
                    Variables.geneCount++;
                    break;
                case "protein":
                    myclass = "Protein-Level";
                    Variables.proteinCount++;
                    break;
                }
                //data for visualisation
                Variables.geneScores.add(cl.scoresOf(d).getCount("gene"));
                Variables.proteinScores.add(cl.scoresOf(d).getCount("protein"));
                Variables.keyWords.add(line.substring(line.indexOf("\t") + 1));

                logIt("Creating Annotations for Brat...");
                bw.write("T" + (count++) + "\t" + myclass + " " + position + " "
                        + (position + line.substring(line.indexOf("\t") + 1).length()) + "\t"
                        + line.substring(line.indexOf("\t") + 1) + "\n");

            } else {
                Variables.otherCount++;
            }

            position += (line.substring(line.indexOf("\t") + 1).length() + 1);

        }

        bw.close();
        System.out.println(Variables.geneScores);
        //Copying .txt file to brat folder
        File source = new File(Variables.filePathTxt);
        File dest = new File(Variables.bratPath + Variables.fileName + ".txt");

        //copy file conventional way using Stream
        copyFileUsingStream(source, dest);

        logIt("Opening Browser...");

        //Open the annotations in the brat server
        Desktop d = Desktop.getDesktop();
        d.browse(new URI(Variables.bratURL + Variables.fileName));

    } catch (HeadlessException | IOException e) {
        JOptionPane.showMessageDialog(null, "ERROR!!!Some error has occured");

    } catch (URISyntaxException ex) {
        Logger.getLogger(DemoUI.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:my.demo.DemoUI.java

License:Open Source License

private void step2Classify() {
    //This function finds out the given set of keywords are responsible for which disease
    Variables.leukemiaCount = 0;/*  ww w.jav a 2s.  c  o  m*/
    Variables.gliomaCount = 0;
    Variables.breastCancerCount = 0;
    Variables.pancreaticCancerCount = 0;
    ColumnDataClassifier cdc = new ColumnDataClassifier(Variables.filePathProp2);
    Classifier<String, String> cl;
    if (!Variables.multiVariate) {

        cl = cdc.makeClassifier(cdc.readTrainingExamples(Variables.filePathTrain2));
    } else {
        cl = cdc.makeClassifier(cdc.readTrainingExamples(Variables.filePathTrain2multi));
    }
    double threshold1, threshold2;
    if (!Variables.multiVariate) {
        threshold1 = Variables.thresholdIndividual;
        threshold2 = Variables.thresholdOutlier;
    } else {
        threshold1 = Variables.thresholdIndividualmulti;
        threshold2 = Variables.thresholdOutliermulti;
    }
    for (String line : ObjectBank.getLineIterator(Variables.bratPath + Variables.fileNameTest2, "utf-8")) {
        /*
         Check every keyword and calculte the score it gives towards every disease.
         The score is considered to be responsible only if it has a score of more than 2
         All such responsible scores are added and at the end the disease with the maximum score value
         is the predicted disease
         */
        Datum<String, String> d = cdc.makeDatumFromLine(line);
        System.out.println(line + "  ==>  " + cl.classOf(d) + "==" + cl.scoresOf(d));

        switch (cl.classOf(d)) {
        case "leukemia":
            if (cl.scoresOf(d).getCount("leukemia") >= threshold1) {
                Variables.leukemiaCount += cl.scoresOf(d).getCount("leukemia");
                System.out.println("Adding");
            }
            break;
        case "breast-cancer":
            if (cl.scoresOf(d).getCount("breast-cancer") >= threshold1) {
                Variables.breastCancerCount += cl.scoresOf(d).getCount("breast-cancer");
            }
            break;
        case "glioma":
            if (cl.scoresOf(d).getCount("glioma") >= threshold1) {
                Variables.gliomaCount += cl.scoresOf(d).getCount("glioma");
            }
            break;
        case "pancreatic-cancer":
            if (cl.scoresOf(d).getCount("pancreatic-cancer") >= threshold1) {
                Variables.pancreaticCancerCount += cl.scoresOf(d).getCount("pancreatic-cancer");
            }
            break;
        }
    }

    logIt("Calculating Scores for Disease Identification...");
    /*
     The concept of outlier is tested on the basis that it cannot surely
     predict the disease and hence none of the scores go beyond the total 6
     */

    double max = Math.max(Math.max(Variables.gliomaCount, Variables.pancreaticCancerCount),
            Math.max(Variables.leukemiaCount, Variables.breastCancerCount));
    if (max >= threshold2) {
        if (max == Variables.pancreaticCancerCount) {

            JOptionPane.showMessageDialog(null, "PANCREATIC CANCER!!!");
        }
        if (max == Variables.gliomaCount) {
            JOptionPane.showMessageDialog(null, "GLIOMA!!!");
        }
        if (max == Variables.leukemiaCount) {
            JOptionPane.showMessageDialog(null, "LEUKEMIA!!!");
        }
        if (max == Variables.breastCancerCount) {
            JOptionPane.showMessageDialog(null, "BREAST CANCER!!!");
        }
    } else {
        JOptionPane.showMessageDialog(null, "Outlier!!");
    }

    JOptionPane.showMessageDialog(null,
            "Scores!!\nPC:" + Variables.pancreaticCancerCount + "\nBC:" + Variables.breastCancerCount
                    + "\nGlioma:" + Variables.gliomaCount + "\nLeukemia:" + Variables.leukemiaCount);
    logIt("Finish!!");
}