jobs.LoadOntologyJob.java Source code

Java tutorial

Introduction

Here is the source code for jobs.LoadOntologyJob.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package jobs;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import models.MorphiaOntologyTerm;
import models.OntologyTerm;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
import play.Logger;
import play.jobs.Job;
import uk.ac.ebi.brain.core.Brain;
import utils.CustomStopWordsStandardAnalyzer;

/**
 * TO KEEP
 *Loads the NCIT ontology, and computes the length of the entries
 * in the same time. The job is fast, less than 5 minutes.
 * @author loopasam
 */
public class LoadOntologyJob extends Job {

    @Override
    public void doJob() throws Exception {

        //http://bioportal.bioontology.org/ontologies/NCIT/?p=classes&conceptid=root
        Brain brain = new Brain();
        Logger.info("Learning...");
        brain.learn("data/NCITNCBO.owl");
        Logger.info("ontology loaded...");
        //Get the first branches
        List<String> topClasses = brain.getSubClasses("Thing", true);

        int totaltop = topClasses.size();
        int countertop = 0;

        for (String topClass : topClasses) {

            countertop++;

            List<String> subclasses = brain.getSubClasses(topClass, false);
            int total = subclasses.size();
            int counter = 0;
            String branch = topClass;
            String label = brain.getLabel(topClass);

            int totalLength = getTotalLength(label);
            int stopWordLength = getLengthWithoutStopWords(label);

            new OntologyTerm(label, topClass, branch, totalLength, stopWordLength).save();

            for (String subclass : subclasses) {
                counter++;
                Logger.info("branch: " + countertop + "/" + totaltop + " - i: " + counter + "/" + total);

                String subLabel = brain.getLabel(subclass);
                totalLength = getTotalLength(subLabel);
                stopWordLength = getLengthWithoutStopWords(subLabel);

                new MorphiaOntologyTerm(subLabel, subclass, branch, totalLength, stopWordLength).save();

            }

        }

        brain.sleep();
        Logger.info("Job finished");
    }

    //Returns the total length of the concept, not considering stop words
    private int getTotalLength(String label) throws IOException {
        //Analyzer doesn't remomve stop words
        Analyzer customanalyzer = new CustomStopWordsStandardAnalyzer(Version.LUCENE_47);
        List<String> resultStop = new ArrayList<String>();
        TokenStream customstream = customanalyzer.tokenStream(null, new StringReader(label));
        customstream.reset();
        while (customstream.incrementToken()) {
            resultStop.add(customstream.getAttribute(CharTermAttribute.class).toString());
        }
        return resultStop.size();
    }

    //Returns the length of the concept after stop words have been removed
    private int getLengthWithoutStopWords(String label) throws IOException {
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
        List<String> result = new ArrayList<String>();
        TokenStream stream = analyzer.tokenStream(null, new StringReader(label));
        stream.reset();
        while (stream.incrementToken()) {
            result.add(stream.getAttribute(CharTermAttribute.class).toString());
        }
        return result.size();
    }

}