ubic.pubmedgate.interactions.evaluation.CreateInteractionSpreadsheet.java Source code

Java tutorial

Introduction

Here is the source code for ubic.pubmedgate.interactions.evaluation.CreateInteractionSpreadsheet.java

Source

/*
 * The WhiteText project
 * 
 * Copyright (c) 2012 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package ubic.pubmedgate.interactions.evaluation;

import java.io.File;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFFont;
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.hssf.usermodel.HSSFRow;

import ubic.basecode.io.excel.CreateSpreadSheet;
import ubic.basecode.io.excel.ExcelUtil;
import ubic.pubmedgate.Config;
import ubic.pubmedgate.GateInterface;
import ubic.pubmedgate.interactions.AirolaXMLReader;
import ubic.pubmedgate.interactions.SLOutputReader;

public class CreateInteractionSpreadsheet extends CreateSpreadSheet {
    AirolaXMLReader XMLReader;
    SLOutputReader SLReader;
    GateInterface p2g;

    public CreateInteractionSpreadsheet(String filename, SLOutputReader SLReader, AirolaXMLReader XMLReader)
            throws Exception {
        super(filename, new InteractionSchema());

        this.SLReader = SLReader;
        this.XMLReader = XMLReader;

        log.info("SLReader size:" + SLReader.getAll().size());
        log.info("XMLReader size:" + XMLReader.getPairCount());

    }

    public void populate(List<String> pairs) throws Exception {
        // convert the list to just pairs, ugly - pretends its normalized
        List<NormalizedConnection> pairsConverted = new LinkedList<NormalizedConnection>();
        for (String pair : pairs) {
            NormalizedConnection nc = new NormalizedConnection();
            nc.pairID = pair;
            pairsConverted.add(nc);
        }
        populate(pairsConverted);
    }

    public void populate(Collection<NormalizedConnection> pairsNormalized) throws Exception {
        int row = 1;
        boolean unseenSetup = SLReader.isUnseenSetup();

        HSSFFont textFont = workbook.createFont();
        textFont.setUnderline(HSSFFont.U_SINGLE);
        textFont.setBoldweight(HSSFFont.BOLDWEIGHT_BOLD);

        Map<String, String> pairToPMID = XMLReader.getPairIDToPMID();
        Map<String, Double> scores = SLReader.getScores();

        List<String> positivePredictions = SLReader.getPositivePredictions();
        List<String> positiveAnnotations = SLReader.getPositives();

        LoadInteractionSpreadsheet allSpreadSheet = AllCuratorsCombined.getFinal2000Results();
        Set<String> previousAccepts = allSpreadSheet.getAcceptedPairs();
        Collection<String> previousEvaluated = allSpreadSheet.getAllPairs();

        for (NormalizedConnection nc : pairsNormalized) {
            String pair = nc.pairID;
            if (row > 65000) {
                save();
                init();
                filename += "overflow.xls";
                row = 1;
            }

            HSSFRow r = spreadsheet.getRow(row);
            if (r == null) {
                r = spreadsheet.createRow(row);
            }
            int col = schema.getPosition("sentence");
            HSSFCell c = r.createCell(col);
            c.setCellType(HSSFCell.CELL_TYPE_STRING);

            HSSFRichTextString richString = XMLReader.getRichStringSentence(pair, textFont);
            c.setCellValue(richString);

            String pmid = pairToPMID.get(pair);

            ExcelUtil.setValue(spreadsheet, row, schema.getPosition("PMID"), pmid);
            ExcelUtil.setValue(spreadsheet, row, schema.getPosition("PairID"), pair);

            String partnerAText = XMLReader.getPartnerAText(pair);
            String partnerBText = XMLReader.getPartnerBText(pair);

            ExcelUtil.setValue(spreadsheet, row, schema.getPosition("RegionAName"), partnerAText);
            ExcelUtil.setValue(spreadsheet, row, schema.getPosition("RegionBName"), partnerBText);

            ExcelUtil.setValue(spreadsheet, row, schema.getPosition("Score"), scores.get(pair));
            ExcelUtil.setValue(spreadsheet, row, schema.getPosition("Prediction"),
                    "" + positivePredictions.contains(pair));
            if (!unseenSetup) {
                ExcelUtil.setValue(spreadsheet, row, schema.getPosition("Previous Annotation"),
                        "" + positiveAnnotations.contains(pair));
            } else {
                ExcelUtil.setValue(spreadsheet, row, schema.getPosition("Previous Annotation"), "none");
            }

            // if it was normalized
            if (nc.regionA != null) {
                ExcelUtil.setValue(spreadsheet, row, schema.getPosition("RegionAResolve"), nc.regionA);
                // check for exact match
                if (nc.regionA.equalsIgnoreCase(partnerAText)) {
                    ExcelUtil.setValue(spreadsheet, row, schema.getPosition("Resolve Evaluation A"), "Accept");
                }
            }
            if (nc.regionB != null) {
                ExcelUtil.setValue(spreadsheet, row, schema.getPosition("RegionBResolve"), nc.regionB);
                // check for exact match
                if (nc.regionB.equalsIgnoreCase(partnerBText)) {
                    ExcelUtil.setValue(spreadsheet, row, schema.getPosition("Resolve Evaluation B"), "Accept");
                }
            }

            String acceptString;
            if (previousEvaluated.contains(pair)) {
                if (previousAccepts.contains(pair)) {
                    acceptString = "Accept";
                } else {
                    acceptString = "Reject";
                }
            } else {
                acceptString = "Not done";
            }
            ExcelUtil.setValue(spreadsheet, row, schema.getPosition("Previous Annotation"), acceptString);

            ExcelUtil.setFormula(spreadsheet, row, schema.getPosition("LinkToAbstract"),
                    "HYPERLINK(\"http://www.ncbi.nlm.nih.gov/pubmed/" + pmid + "\", \"PubMed\")");

            row++;
        }

    }

    /**
     * @param args
     */
    public static void main(String[] args) throws Exception {
        GateInterface p2g = new GateInterface();

        // cross validation on home machine
        String corpusFilename = "/home/leon/ppi-benchmark/Corpora/Original-Modified/WhiteText before negation preds/WhiteText.before.deleting.line.xml";
        String baseFolder = "/home/leon/ppi-benchmark/Saved Results/SL/CV/WhiteText/predict/WhiteText";
        p2g.setUnSeenCorpNull();
        AirolaXMLReader XMLReader = new AirolaXMLReader(corpusFilename, p2g, "Suzanne");
        SLOutputReader SLReader = new SLOutputReader(new File(baseFolder));

        // cross validation on home machine
        // String baseFolder = Config.config.getString( "whitetext.iteractions.ppiBaseFolder" )
        // + "Saved Results/SL/CV/WhiteTextNegFixFull/predict/WhiteTextNegFixFull";
        // String filename = Config.config.getString( "whitetext.iteractions.ppiBaseFolder" )
        // + "Corpora/Original-Modified/WhiteTextNegFixFull.xml";
        // p2g.setUnSeenCorpNull();
        // AirolaXMLReader XMLReader = new AirolaXMLReader( corpusFilename, p2g, "Suzanne" );
        // SLOutputReader SLReader = new SLOutputReader( new File( baseFolder ) );

        // unSeen set
        // String baseFolder = Config.config.getString( "whitetext.iteractions.ppiBaseFolder" )
        // + "Saved Results/SL/CC/NegFixFullOnUnseen/";
        // String corpusFilename = Config.config.getString( "whitetext.iteractions.ppiBaseFolder" )
        // + "Corpora/Original-Modified/WhiteTextUnseen.orig.xml";
        // String trainingSet = "WhiteTextNegFixFull";
        // String testSet = "WhiteTextUnseen";
        // String annotationSet = "Mallet";
        // AirolaXMLReader XMLReader = new AirolaXMLReader( corpusFilename, p2g, annotationSet );
        // SLOutputReader SLReader = new SLOutputReader( trainingSet, testSet, baseFolder );

        String filename;
        filename = Config.config.getString("whitetext.iteractions.results.folder")
                + "TEMPEvalSheetTestPositivePredictions.xls";
        CreateInteractionSpreadsheet test = new CreateInteractionSpreadsheet(filename, SLReader, XMLReader);
        test.populate(SLReader.getPositivePredictions());
        test.save();

        filename = Config.config.getString("whitetext.iteractions.results.folder")
                + "TEMPEvalSheetTestNegativePredictions.xls";
        test = new CreateInteractionSpreadsheet(filename, SLReader, XMLReader);
        test.populate(SLReader.getNegativePredictions());
        test.save();
    }
}