act.shared.helpers.XMLToImportantChemicals.java Source code

Java tutorial

Introduction

Here is the source code for act.shared.helpers.XMLToImportantChemicals.java

Source

/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to act@20n.com.                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package act.shared.helpers;

import java.io.FileInputStream;
import java.util.Stack;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.events.XMLEvent;

import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.mongodb.util.JSON;

public class XMLToImportantChemicals {
    private static final String strTag = "____txt_";
    private String xmlfile;
    private String rowTag;
    private String idTag;
    private String chemTag;
    private String DBS;

    public XMLToImportantChemicals(String f, String DBS, String rowTag, String idTag, String chemTag) {
        this.xmlfile = f;
        this.rowTag = rowTag;
        this.idTag = idTag;
        this.chemTag = chemTag;
        this.DBS = DBS;
    }

    public void process() {
        try {
            FileInputStream fileInputStream = new FileInputStream(this.xmlfile);
            XMLStreamReader xml = XMLInputFactory.newInstance().createXMLStreamReader(fileInputStream);
            process(xml);
        } catch (Exception ex) {
            ex.printStackTrace();
            System.out.println("XML Reading error:" + this.xmlfile);
            System.exit(-1);
        }
    }

    private void process(XMLStreamReader xml) throws XMLStreamException {
        String tag;
        String root = null;
        Stack<DBObject> json = new Stack<DBObject>();
        DBObject js;
        while (xml.hasNext()) {
            int eventType = xml.next();
            while (xml.isWhiteSpace() || eventType == XMLEvent.SPACE)
                eventType = xml.next();

            switch (eventType) {
            case XMLEvent.START_ELEMENT:
                tag = xml.getLocalName();
                if (root == null) {
                    root = tag;
                } else {
                    json.push(new BasicDBObject());
                }
                break;
            case XMLEvent.END_ELEMENT:
                tag = xml.getLocalName();
                if (tag.equals(root)) {
                    // will terminate in next iteration
                } else {
                    js = json.pop();
                    if (json.size() == 0) {
                        if (tag.equals(rowTag))
                            printEntry(js);
                        else
                            printUnwantedEntry(js);
                    } else {
                        putListStrOrJSON(json.peek(), tag, js);
                    }
                }
                break;

            case XMLEvent.CHARACTERS:
                String txt = xml.getText();
                js = json.peek();
                if (js.containsField(strTag)) {
                    txt = js.get(strTag) + txt;
                    js.removeField(strTag);
                }
                js.put(strTag, txt);
                break;

            case XMLEvent.START_DOCUMENT:
                break;
            case XMLEvent.END_DOCUMENT:
                break;
            case XMLEvent.COMMENT:
            case XMLEvent.ENTITY_REFERENCE:
            case XMLEvent.ATTRIBUTE:
            case XMLEvent.PROCESSING_INSTRUCTION:
            case XMLEvent.DTD:
            case XMLEvent.CDATA:
            case XMLEvent.SPACE:
                System.out.format("%s --\n", eventType);
                break;
            }
        }
    }

    private void putListStrOrJSON(DBObject json, String tag, DBObject toAdd) {
        // if it is a string add it unencapsulated
        if (toAdd.keySet().size() == 1 && toAdd.containsField(strTag))
            putElemOrList(json, tag, toAdd.get(strTag));
        else
            putElemOrList(json, tag, toAdd);
    }

    private void putElemOrList(DBObject json, String tag, Object add) {
        // if tag already present then make it a list
        if (json.containsField(tag)) {
            BasicDBList l;
            Object already = json.get(tag);
            if (already instanceof BasicDBList) {
                l = (BasicDBList) already;
            } else {
                l = new BasicDBList();
                l.add(already);
            }
            l.add(add);
            json.removeField(tag);
            json.put(tag, l);
            return;
        }

        // else, just add as is
        json.put(tag, add);
        return;
    }

    private void printEntry(DBObject json) {
        Object ido = json.get(idTag);
        Object caso = json.get(chemTag);
        String id = ido instanceof String ? (String) ido : "";
        String cas = caso instanceof String ? (String) caso : "";
        String inchi = getInchi(json);
        if (inchi != null)
            System.out.format("%s\t%s\t%s\t%s\n", this.DBS, id, inchi, JSON.serialize(json));
        else
            System.out.format("%s\t%s\t%s\t%s\n", this.DBS, id, cas, JSON.serialize(json));
    }

    private String getInchi(DBObject json) {
        // under calculated-properties.property.[{kind=InChI, value=<inchi>}]
        DBObject o;
        if (json.containsField("calculated-properties")) {
            o = (DBObject) json.get("calculated-properties");
            if (o.containsField("property")) {
                o = (DBObject) o.get("property");
                if (o instanceof BasicDBList) {
                    for (Object kv : (BasicDBList) o) {
                        o = (DBObject) kv;
                        if (o.containsField("kind") && o.get("kind").equals("InChI") && o.containsField("value")) {
                            return (String) o.get("value");
                        }
                    }
                }
            }
        }
        return null;
    }

    private void printUnwantedEntry(DBObject json) {
        System.err.format("%s\t%s\t%s\t%s\n", "UNKNOWN", "ID?", "CAS?", JSON.serialize(json));
    }

}