Example usage for edu.stanford.nlp.objectbank ObjectBank getLineIterator

List of usage examples for edu.stanford.nlp.objectbank ObjectBank getLineIterator

Introduction

In this page you can find the example usage for edu.stanford.nlp.objectbank ObjectBank getLineIterator.

Prototype

public static ObjectBank<String> getLineIterator(Collection<?> filesStringsAndReaders, String encoding) 

Source Link

Usage

From source file:de.iisys.ocr.pos.CustomNERFeatureFactory.java

License:Open Source License

private void initLexicon(SeqClassifierFlags flags) {
    if (flags.distSimLexicon == null) {
        return;/*www  . java  2 s. c  om*/
    }
    if (lexicon != null) {
        return;
    }
    Timing.startDoing("Loading distsim lexicon from " + flags.distSimLexicon);
    lexicon = Generics.newHashMap();
    boolean terryKoo = "terryKoo".equals(flags.distSimFileFormat);
    for (String line : ObjectBank.getLineIterator(flags.distSimLexicon, flags.inputEncoding)) {
        String word;
        String wordClass;
        if (terryKoo) {
            String[] bits = line.split("\\t");
            word = bits[1];
            wordClass = bits[0];
            if (flags.distSimMaxBits > 0 && wordClass.length() > flags.distSimMaxBits) {
                wordClass = wordClass.substring(0, flags.distSimMaxBits);
            }
        } else {
            // "alexClark"
            String[] bits = line.split("\\s+");
            word = bits[0];
            wordClass = bits[1];
        }
        if (!flags.casedDistSim) {
            word = word.toLowerCase();
        }
        if (flags.numberEquivalenceDistSim) {
            word = WordShapeClassifier.wordShape(word, WordShapeClassifier.WORDSHAPEDIGITS);
        }
        lexicon.put(word, wordClass);
    }
    Timing.endDoing();
}

From source file:my.demo.DemoUI.java

License:Open Source License

private void runMyClassifier() {
    //This funtion is used to identify proteins and genes from a given
    //random abstract by using the stanfor classifier

    try {/*from   w w  w.  j  a v a2  s  .c  o m*/
        int count = 1;
        int position = 0;

        Scanner sc2 = null;

        try {
            sc2 = new Scanner(new File(Variables.filePathTxt));
            logIt("Reading The abstract...");
            // to get start and end indices

        } catch (FileNotFoundException e1) {
            // TODO Auto-generated catch block!!
            JOptionPane.showMessageDialog(null, "ERROR!!!File Not Found!!");
            //e1.printStackTrace();
        }

        File file = new File(Variables.bratPath + Variables.fileName + ".test");

        logIt("Processing The abstract...");

        FileWriter fw = new FileWriter(file.getAbsoluteFile());
        BufferedWriter bw = new BufferedWriter(fw);

        while (sc2.hasNext()) {
            String s = sc2.next();
            bw.write("-" + "\t" + s + "\n");
        }

        bw.close();

        file = new File(Variables.bratPath + Variables.fileName + ".ann");
        logIt("Running the Classifier...");

        fw = new FileWriter(file.getAbsoluteFile());
        bw = new BufferedWriter(fw);

        ColumnDataClassifier cdc = new ColumnDataClassifier(Variables.filePathProp);
        Classifier<String, String> cl = cdc.makeClassifier(cdc.readTrainingExamples(Variables.filePathTrain));
        for (String line : ObjectBank.getLineIterator(Variables.bratPath + Variables.fileName + ".test",
                "utf-8")) {

            Datum<String, String> d = cdc.makeDatumFromLine(line);
            System.out.println(line + "  ==>  " + cl.classOf(d) + "--");
            String myclass = "";
            if (!cl.classOf(d).equals("Others")) {
                switch (cl.classOf(d)) {
                case "gene":
                    myclass = "Gene-Level";
                    Variables.geneCount++;
                    break;
                case "protein":
                    myclass = "Protein-Level";
                    Variables.proteinCount++;
                    break;
                }
                //data for visualisation
                Variables.geneScores.add(cl.scoresOf(d).getCount("gene"));
                Variables.proteinScores.add(cl.scoresOf(d).getCount("protein"));
                Variables.keyWords.add(line.substring(line.indexOf("\t") + 1));

                logIt("Creating Annotations for Brat...");
                bw.write("T" + (count++) + "\t" + myclass + " " + position + " "
                        + (position + line.substring(line.indexOf("\t") + 1).length()) + "\t"
                        + line.substring(line.indexOf("\t") + 1) + "\n");

            } else {
                Variables.otherCount++;
            }

            position += (line.substring(line.indexOf("\t") + 1).length() + 1);

        }

        bw.close();
        System.out.println(Variables.geneScores);
        //Copying .txt file to brat folder
        File source = new File(Variables.filePathTxt);
        File dest = new File(Variables.bratPath + Variables.fileName + ".txt");

        //copy file conventional way using Stream
        copyFileUsingStream(source, dest);

        logIt("Opening Browser...");

        //Open the annotations in the brat server
        Desktop d = Desktop.getDesktop();
        d.browse(new URI(Variables.bratURL + Variables.fileName));

    } catch (HeadlessException | IOException e) {
        JOptionPane.showMessageDialog(null, "ERROR!!!Some error has occured");

    } catch (URISyntaxException ex) {
        Logger.getLogger(DemoUI.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:my.demo.DemoUI.java

License:Open Source License

private void step2Classify() {
    //This function finds out the given set of keywords are responsible for which disease
    Variables.leukemiaCount = 0;/*  w ww  .j a  va 2 s  .  c o m*/
    Variables.gliomaCount = 0;
    Variables.breastCancerCount = 0;
    Variables.pancreaticCancerCount = 0;
    ColumnDataClassifier cdc = new ColumnDataClassifier(Variables.filePathProp2);
    Classifier<String, String> cl;
    if (!Variables.multiVariate) {

        cl = cdc.makeClassifier(cdc.readTrainingExamples(Variables.filePathTrain2));
    } else {
        cl = cdc.makeClassifier(cdc.readTrainingExamples(Variables.filePathTrain2multi));
    }
    double threshold1, threshold2;
    if (!Variables.multiVariate) {
        threshold1 = Variables.thresholdIndividual;
        threshold2 = Variables.thresholdOutlier;
    } else {
        threshold1 = Variables.thresholdIndividualmulti;
        threshold2 = Variables.thresholdOutliermulti;
    }
    for (String line : ObjectBank.getLineIterator(Variables.bratPath + Variables.fileNameTest2, "utf-8")) {
        /*
         Check every keyword and calculte the score it gives towards every disease.
         The score is considered to be responsible only if it has a score of more than 2
         All such responsible scores are added and at the end the disease with the maximum score value
         is the predicted disease
         */
        Datum<String, String> d = cdc.makeDatumFromLine(line);
        System.out.println(line + "  ==>  " + cl.classOf(d) + "==" + cl.scoresOf(d));

        switch (cl.classOf(d)) {
        case "leukemia":
            if (cl.scoresOf(d).getCount("leukemia") >= threshold1) {
                Variables.leukemiaCount += cl.scoresOf(d).getCount("leukemia");
                System.out.println("Adding");
            }
            break;
        case "breast-cancer":
            if (cl.scoresOf(d).getCount("breast-cancer") >= threshold1) {
                Variables.breastCancerCount += cl.scoresOf(d).getCount("breast-cancer");
            }
            break;
        case "glioma":
            if (cl.scoresOf(d).getCount("glioma") >= threshold1) {
                Variables.gliomaCount += cl.scoresOf(d).getCount("glioma");
            }
            break;
        case "pancreatic-cancer":
            if (cl.scoresOf(d).getCount("pancreatic-cancer") >= threshold1) {
                Variables.pancreaticCancerCount += cl.scoresOf(d).getCount("pancreatic-cancer");
            }
            break;
        }
    }

    logIt("Calculating Scores for Disease Identification...");
    /*
     The concept of outlier is tested on the basis that it cannot surely
     predict the disease and hence none of the scores go beyond the total 6
     */

    double max = Math.max(Math.max(Variables.gliomaCount, Variables.pancreaticCancerCount),
            Math.max(Variables.leukemiaCount, Variables.breastCancerCount));
    if (max >= threshold2) {
        if (max == Variables.pancreaticCancerCount) {

            JOptionPane.showMessageDialog(null, "PANCREATIC CANCER!!!");
        }
        if (max == Variables.gliomaCount) {
            JOptionPane.showMessageDialog(null, "GLIOMA!!!");
        }
        if (max == Variables.leukemiaCount) {
            JOptionPane.showMessageDialog(null, "LEUKEMIA!!!");
        }
        if (max == Variables.breastCancerCount) {
            JOptionPane.showMessageDialog(null, "BREAST CANCER!!!");
        }
    } else {
        JOptionPane.showMessageDialog(null, "Outlier!!");
    }

    JOptionPane.showMessageDialog(null,
            "Scores!!\nPC:" + Variables.pancreaticCancerCount + "\nBC:" + Variables.breastCancerCount
                    + "\nGlioma:" + Variables.gliomaCount + "\nLeukemia:" + Variables.leukemiaCount);
    logIt("Finish!!");
}