TemplateExtractor.java :  » Science » Chemistry-Development-Kit-1.3.5 » org » openscience » cdk » modeling » builder3d » Java Open Source

Java Open Source » Science » Chemistry Development Kit 1.3.5 
Chemistry Development Kit 1.3.5 » org » openscience » cdk » modeling » builder3d » TemplateExtractor.java
/* $Revision: 7885 $ $Author: egonw $ $Date: 2007-02-07 21:19:27 +0100 (Wed, 07 Feb 2007) $
 * 
 * Copyright (C) 2004-2007  Christian Hoppe <c.hoppe_@web.de>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 * All we ask is that proper credit is given for our work, which includes
 * - but is not limited to - adding the above copyright notice to the beginning
 * of your source code files, and to any copyright notice that you may distribute
 * with programs based on this work.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
package org.openscience.cdk.modeling.builder3d;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.fingerprint.Fingerprinter;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IMolecule;
import org.openscience.cdk.interfaces.IMoleculeSet;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.interfaces.IRingSet;
import org.openscience.cdk.io.MDLWriter;
import org.openscience.cdk.io.iterator.IteratingMDLReader;
import org.openscience.cdk.isomorphism.matchers.QueryAtomContainerCreator;
import org.openscience.cdk.nonotify.NoNotificationChemObjectBuilder;
import org.openscience.cdk.ringsearch.AllRingsFinder;
import org.openscience.cdk.ringsearch.RingPartitioner;
import org.openscience.cdk.ringsearch.SSSRFinder;
import org.openscience.cdk.smiles.SmilesGenerator;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import org.openscience.cdk.tools.manipulator.RingSetManipulator;

/**
 * Helper class that help setup a template library of CDK's Builder3D.
 * 
 * @author     Christian Hoppe.
 * @cdk.module builder3dtools
 * @cdk.githash
 */
public class TemplateExtractor {

  static final String usage = "Usage: TemplateExtractor SDFinfile outfile anyAtom=true/false anyBondAnyAtom=true/false";
  
  private final static IChemObjectBuilder builder = NoNotificationChemObjectBuilder.getInstance();

  public TemplateExtractor() {
  }

  public void cleanDataSet(String dataFile) {
    IteratingMDLReader imdl = null;
    IMoleculeSet som = builder.newInstance(IMoleculeSet.class);
    try {
      System.out.println("Start clean dataset...");
      BufferedReader fin = new BufferedReader(new FileReader(dataFile));
      imdl = new IteratingMDLReader(fin, NoNotificationChemObjectBuilder
          .getInstance());
      System.out.print("Read File in..");
    } catch (Exception exc) {
      System.out.println("Could not read Molecules from file " + dataFile
          + " due to: " + exc.getMessage());
    }
    System.out.println("READY");
    int c = 0;
    while (imdl.hasNext()) {
      c++;
      if (c % 1000 == 0) {
        System.out.println("...");
      }
      IMolecule m = builder.newInstance(IMolecule.class);
      m = (IMolecule) imdl.next();
      if (m.getAtomCount() > 2) {
        if (m.getAtom(0).getPoint3d() != null) {
          som.addMolecule(m);
        }
      }
    }
    try {
      imdl.close();
    } catch (Exception exc1) {
      System.out.println("Could not close Reader due to: "
          + exc1.getMessage());
    }
    System.out.println(som.getMoleculeCount() + " Templates are read in");
    writeChemModel(som, dataFile, "_CLEAN");
  }

  public void ReadNCISdfFileAsTemplate(String dataFile) {
    IteratingMDLReader imdl = null;
    IMoleculeSet som = builder.newInstance(IMoleculeSet.class);
    try {
      System.out.println("Start...");
      BufferedReader fin = new BufferedReader(new FileReader(dataFile));
      imdl = new IteratingMDLReader(fin, NoNotificationChemObjectBuilder
          .getInstance());
      System.out.print("Read File in..");
    } catch (Exception exc) {
      System.out.println("Could not read Molecules from file " + dataFile
          + " due to: " + exc.getMessage());
    }
    System.out.println("READY");
    while (imdl.hasNext()) {
      som.addMolecule((IMolecule) imdl.next());
    }
    try {
      imdl.close();
    } catch (Exception exc1) {
      System.out.println("Could not close Reader due to: "
          + exc1.getMessage());
    }
    System.out.println(som.getMoleculeCount() + " Templates are read in");
  }

  public void PartitionRingsFromComplexRing(String dataFile) {
    IteratingMDLReader imdl = null;
    IMoleculeSet som = builder.newInstance(IMoleculeSet.class);
    IMolecule m = null;
    try {
      System.out.println("Start...");
      BufferedReader fin = new BufferedReader(new FileReader(dataFile));
      imdl = new IteratingMDLReader(fin, builder);
      System.out.print("Read File in..");
    } catch (Exception exc) {
      System.out.println("Could not read Molecules from file " + dataFile
          + " due to: " + exc.getMessage());
    }
    System.out.println("READY");
    while (imdl.hasNext()) {
      m = (IMolecule) imdl.next();
      System.out.println("Atoms:" + m.getAtomCount());
      IRingSet ringSetM = new SSSRFinder(m).findSSSR();
      // som.addMolecule(m);
      for (int i = 0; i < ringSetM.getAtomContainerCount(); i++) {
        som.addMolecule(builder.newInstance(IMolecule.class, ringSetM.getAtomContainer(i)));
      }
    }
    try {
      imdl.close();
    } catch (Exception exc1) {
      System.out.println("Could not close Reader due to: "
          + exc1.getMessage());
    }
    System.out.println(som.getMoleculeCount() + " Templates are read in");
    writeChemModel(som, dataFile, "_VERSUCH");
  }

  public void extractUniqueRingSystemsFromFile(String dataFile) {
    System.out.println("****** EXTRACT UNIQUE RING SYSTEMS ******");
    System.out.println("From file:" + dataFile);
    IMolecule m = null;
    // RingPartitioner ringPartitioner=new RingPartitioner();
    List ringSystems = null;
    IteratingMDLReader imdl = null;

    HashMap HashRingSystems = new HashMap();
    SmilesGenerator smilesGenerator = new SmilesGenerator();

    int counterRings = 0;
    int counterMolecules = 0;
    int counterUniqueRings = 0;
    IRingSet ringSet = null;
    String key = "";
    IAtomContainer ac = null;

    String molfile = dataFile + "_UniqueRings";

    // FileOutputStream fout=null;
    MDLWriter mdlw = null;
    try {
      FileOutputStream fout = new FileOutputStream(molfile);
      mdlw = new MDLWriter(fout);
    } catch (Exception ex2) {
      System.out.println("IOError:cannot write file due to:"
          + ex2.toString());
    }

    try {
      System.out.println("Start...");
      BufferedReader fin = new BufferedReader(new FileReader(dataFile));
      imdl = new IteratingMDLReader(fin, builder);
      System.out.println("Read File in..");
    } catch (Exception exc) {
      System.out.println("Could not read Molecules from file " + dataFile
          + " due to: " + exc.getMessage());
    }
    while (imdl.hasNext()) {
      m = (IMolecule) imdl.next();
      counterMolecules = counterMolecules + 1;
      /*
       * try{ HueckelAromaticityDetector.detectAromaticity(m);
       * }catch(Exception ex1){ System.out.println("Could not find
       * aromaticity due to:"+ex1); }
       */
      IRingSet ringSetM = new SSSRFinder(m).findSSSR();

      if (counterMolecules % 1000 == 0) {
        System.out.println("Molecules:" + counterMolecules);
      }

      if (ringSetM.getAtomContainerCount() > 0) {
        ringSystems = RingPartitioner.partitionRings(ringSetM);

        for (int i = 0; i < ringSystems.size(); i++) {
          ringSet = (IRingSet) ringSystems.get(i);
          ac = builder.newInstance(IAtomContainer.class);
          Iterator containers = RingSetManipulator.getAllAtomContainers(ringSet).iterator();
          while (containers.hasNext()) {
            ac.add((IAtomContainer)containers.next());
          }
          counterRings = counterRings + 1;
          // Only connection is important
          for (int j = 0; j < ac.getAtomCount(); j++) {
            (ac.getAtom(j)).setSymbol("C");
          }

          key = smilesGenerator.createSMILES(builder.newInstance(IMolecule.class,ac));
          // System.out.println("OrgKey:"+key+" For
          // Molecule:"+counter);
          if (HashRingSystems.containsKey(key)) {
            // System.out.println("HAS KEY:ADD");
            // Vector tmp=(Vector)HashRingSystems.get(key);
            // tmp.add((AtomContainer)ringSet.getRingSetInAtomContainer());
            // HashRingSystems.put(key,tmp);
            // int
            // tmp=((Integer)HashRingSystems.get(key)).intValue();
            // tmp=tmp+1;
            // HashRingSystems.put(key,new Integer(tmp));
          } else {
            counterUniqueRings = counterUniqueRings + 1;
            // Vector rings2=new Vector();
            // rings2.add((AtomContainer)RingSetManipulator.getAllInOneContainer(ringSet));
            HashRingSystems.put(key, new String("1"));
            try {
              // mdlw.write(new Molecule
              // ((AtomContainer)RingSetManipulator.getAllInOneContainer(ringSet)));
              mdlw.write(builder.newInstance(IMolecule.class,ac));
            } catch (Exception emdl) {
            }

          }
        }

      }
    }
    try {
      imdl.close();
      mdlw.close();
    } catch (Exception exc1) {
      System.out.println("Could not close iterator mdl reader due to: "
          + exc1.getMessage());
    }
    // System.out.println("READY Molecules:"+counterMolecules);
    System.out.println("READY Molecules:" + counterMolecules
        + " RingSystems:" + counterRings + " UniqueRingsSystem:"
        + counterUniqueRings);
    System.out.println("HashtableKeys:" + HashRingSystems.size());

    /*
     * int c=0; Set keyset = HashRingSystems.keySet(); Iterator
     * it=keyset.iterator(); IMoleculeSet som=new MoleculeSet();
     * SmilesParser smileParser=new SmilesParser(); String ringSmile="";
     * while (it.hasNext()) { key=(String)it.next();
     * ringSmile=(String)HashRingSystems.get(key);
     * System.out.println("HashtableSmile:"+ringSmile+" key:"+key); try{
     * som.addMolecule(smileParser.parseSmiles(ringSmile)); }catch
     * (Exception ex5){ System.out.println("Error in som.addmolecule due
     * to:"+ex5); } }
     */

    // writeChemModel(som,dataFile,"_TESTTESTTESTTESTTEST");
  }

  public void writeChemModel(IMoleculeSet som, String file, String endFix) {
    System.out.println("WRITE Molecules:" + som.getMoleculeCount());
    String molfile = file + endFix;
    try {
      FileOutputStream fout = new FileOutputStream(molfile);
      MDLWriter mdlw = new MDLWriter(fout);
      mdlw.write(som);
      mdlw.close();
    } catch (Exception ex2) {
      System.out.println("IOError:cannot write file due to:"
          + ex2.toString());
    }
  }

  public void makeCanonicalSmileFromRingSystems(String dataFileIn,
      String dataFileOut) {
    System.out.println("Start make SMILES...");
    IMolecule m = null;
    IteratingMDLReader imdl = null;
    // QueryAtomContainer query=null;
    List data = new ArrayList();
    SmilesGenerator smiles = new SmilesGenerator();
    try {
      System.out.println("Start...");
      BufferedReader fin = new BufferedReader(new FileReader(dataFileIn));
      imdl = new IteratingMDLReader(fin, NoNotificationChemObjectBuilder
          .getInstance());
      // fin.close();
      System.out.println("Read File in..");
    } catch (Exception exc) {
      System.out.println("Could not read Molecules from file "
          + dataFileIn + " due to: " + exc.getMessage());
    }
    while (imdl.hasNext()) {
      m = (IMolecule) imdl.next();
      /*
       * try{ HueckelAromaticityDetector.detectAromaticity(m);
       * }catch(Exception ex1){ System.out.println("Could not find
       * aromaticity due to:"+ex1); }
       */
      // query=QueryAtomContainerCreator.createAnyAtomContainer(m,true);
      // System.out.println("String:"+smiles.createSMILES(new
      // Molecule(m)));
      try {

        data.add((String) smiles.createSMILES(builder.newInstance(IMolecule.class,m)));
      } catch (Exception exc1) {
        System.out.println("Could not create smile due to: "
            + exc1.getMessage());
      }
    }
    try {
      imdl.close();
    } catch (Exception exc2) {
    }

    System.out.print("...ready\nWrite data...");
    BufferedWriter fout = null;
    try {
      fout = new BufferedWriter(new FileWriter(dataFileOut));
    } catch (Exception exc3) {
      System.out.println("Could not write smile in file " + dataFileOut
          + " due to: " + exc3.getMessage());
    }
    for (int i = 0; i < data.size(); i++) {
      // System.out.println("write:"+(String)data.get(i));
      try {

        fout.write(((String) data.get(i)));
        fout.newLine();
      } catch (Exception exc4) {
      }
    }
    System.out.println("number of smiles:" + data.size());
    System.out.println("...ready");
    try {
      fout.close();
    } catch (Exception exc5) {
    }
  }

  public List makeFingerprintsFromSdf(boolean anyAtom, boolean anyAtomAnyBond, Map timings, BufferedReader fin, int limit) throws Exception{
    AllRingsFinder allRingsFinder = new AllRingsFinder();
    allRingsFinder.setTimeout(10000); // 10 seconds

    
    Fingerprinter fingerPrinter = new Fingerprinter(Fingerprinter.DEFAULT_SIZE, Fingerprinter.DEFAULT_SEARCH_DEPTH);
    IMolecule m = null;
    IteratingMDLReader imdl=null;
    //QueryAtomContainer query=null;
    IAtomContainer query = null;
    List<BitSet> data = new ArrayList<BitSet>();
    try {
      System.out.print("Read data file in ...");
      imdl = new IteratingMDLReader(fin, NoNotificationChemObjectBuilder
          .getInstance());
      // fin.close();
      System.out.println("ready");
    } catch (Exception exc) {
      System.out.println("Could not read Molecules from file"+
          " due to: " + exc.getMessage());
    }
    int moleculeCounter = 0;
    int fingerprintCounter = 0;
    System.out.print("Generated Fingerprints: " + fingerprintCounter + "    ");
    while (imdl.hasNext() && (moleculeCounter<limit || limit==-1)) {
      m = (IMolecule) imdl.next();
      moleculeCounter++;
            if (anyAtom && !anyAtomAnyBond) {
                query = QueryAtomContainerCreator.createAnyAtomContainer(m, false);
            } else {
                query = AtomContainerManipulator.createAllCarbonAllSingleNonAromaticBondAtomContainer(m);

            }
      try {
        long time = -System.currentTimeMillis();
        if (anyAtom || anyAtomAnyBond){
          data.add(fingerPrinter.getFingerprint(query));
          fingerprintCounter=fingerprintCounter+1;
        } else {
          data.add(fingerPrinter.getFingerprint(query));
          fingerprintCounter = fingerprintCounter + 1;
        }
        time += System.currentTimeMillis();
        // store the time
        String bin = Integer.toString((int)Math.floor(time/10));
        if (timings.containsKey(bin)) {
          timings.put(bin, new Integer((((Integer)timings.get(bin)).intValue()) + 1));
        } else {
          timings.put(bin, new Integer(1));
        }
      }catch(Exception exc1){
        System.out.println("QueryFingerprintError: from molecule:"
            + moleculeCounter + " due to:" + exc1.getMessage());
        
        // OK, just adds a fingerprint with all ones, so that any
        // structure will match this template, and leave it up
        // to substructure match to figure things out
        BitSet allOnesFingerprint = new BitSet(fingerPrinter.getSize());
        for (int i=0; i<fingerPrinter.getSize(); i++) {
          allOnesFingerprint.set(i, true);
        }
        data.add(allOnesFingerprint);
        fingerprintCounter = fingerprintCounter + 1;
      }
      
      if (fingerprintCounter % 2 == 0)
        System.out.print("\b" + "/");
      else
        System.out.print("\b" + "\\");

      
      if (fingerprintCounter % 100 == 0)
        System.out.print("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b" + "Generated Fingerprints: " + fingerprintCounter + "   \n");

    }// while 
    try {
      imdl.close();
    } catch (Exception exc2) 
    {
      exc2.printStackTrace();
    }
    System.out.print("...ready with:" + moleculeCounter
        + " molecules\nWrite data...of data vector:" + data.size()
        + " fingerprintCounter:" + fingerprintCounter);

    return data;
  }
  
  public void makeFingerprintFromRingSystems(String dataFileIn,
      String dataFileOut, boolean anyAtom, boolean anyAtomAnyBond)
  throws Exception {
    Map timings = new HashMap();

    System.out.println("Start make fingerprint from file:" + dataFileIn
        + " ...");
    BufferedReader fin = new BufferedReader(new FileReader(dataFileIn));
    List data=makeFingerprintsFromSdf(anyAtom, anyAtomAnyBond, timings, fin,-1);
    BufferedWriter fout = null;
    try {
      fout = new BufferedWriter(new FileWriter(dataFileOut));
    } catch (Exception exc3) {
      System.out.println("Could not write Fingerprint in file "
          + dataFileOut + " due to: " + exc3.getMessage());
    }
    for (int i = 0; i < data.size(); i++) {
      try {
        fout.write(((BitSet) data.get(i)).toString());
        fout.newLine();
      } catch (Exception exc4) {
      }
    }
    System.out.println("\nFingerprints:" + data.size()
        + " are written...ready");
    System.out.println("\nComputing time statistics:\n" + timings.toString());
    try {
      fout.close();
    } catch (Exception exc5) {
    }
  }

  public IMolecule removeLoopBonds(IMolecule molecule, int position) {
    for (int i = 0; i < molecule.getBondCount(); i++) {
      IBond bond = molecule.getBond(i);
      if (bond.getAtom(0) == bond.getAtom(1)) {
        System.out.println("Loop found! Molecule:" + position);
        molecule.removeBond(bond);
      }
    }

    return molecule;
  }

  public IAtomContainer createAnyAtomAtomContainer(IAtomContainer atomContainer)
  throws Exception {
    IAtomContainer query = (IAtomContainer) atomContainer.clone();
    // System.out.println("createAnyAtomAtomContainer");
    for (int i = 0; i < query.getAtomCount(); i++) {
      // System.out.print(" "+i);
      query.getAtom(i).setSymbol("C");
    }
    return query;
  }  
  
  public IAtomContainer resetFlags(IAtomContainer ac) {
    for (int f = 0; f < ac.getAtomCount(); f++) {
      ac.getAtom(f).setFlag(CDKConstants.VISITED, false);
    }
    for (int f = 0; f < ac.getElectronContainerCount(); f++) {
      ac.getElectronContainer(f).setFlag(CDKConstants.VISITED, false);
    }
    return ac;
  }

  public static void main(String[] args) {
    if (args.length < 4) {
      System.out.println(usage);

    }
    try {
      new TemplateExtractor().makeFingerprintFromRingSystems(args[0],
          args[1], Boolean.valueOf(args[2]), Boolean.valueOf(args[3]));
    } catch (Exception e) {
      System.out.println(usage);
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }

}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.