VisualSearchIndexBuilder.java :  » Search » spectacles » com » stromberglabs » visual » builder » Java Open Source

Java Open Source » Search » spectacles 
spectacles » com » stromberglabs » visual » builder » VisualSearchIndexBuilder.java
package com.stromberglabs.visual.builder;

import java.io.File;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;

import org.apache.commons.configuration.ConfigurationException;
import org.apache.log4j.Logger;

import com.stromberglabs.cluster.KClusterer;
import com.stromberglabs.cluster.KMeansClusterer;
import com.stromberglabs.cluster.KMeansForestClusterer;
import com.stromberglabs.db.DBConnectionManager;
import com.stromberglabs.index.WordInvertedIndex;
import com.stromberglabs.tree.KMeansTree;
import com.stromberglabs.tree.query.QueryTree;
import com.stromberglabs.util.Config;
import com.stromberglabs.util.file.SerializationUtils;
import com.stromberglabs.visual.ip.cache.DirectDBInterestPointCache;
import com.stromberglabs.visual.ip.cache.InterestPointCache;
import com.stromberglabs.visual.ip.dao.InterestPointDAO;
import com.stromberglabs.visual.ip.dao.SIFTInterestPointDAO;
import com.stromberglabs.visual.ip.dao.SURFInterestPointDAO;
import com.stromberglabs.visual.ip.dao.ThreadedInterestPointGenerator;
import com.stromberglabs.visual.search.L1ImageWordIndex;
import com.stromberglabs.visual.tree.VocabTreeManager;

/**
 * This class will encapsulate all of the steps required to build a searchable
 * index (compressed) from a folder of images. 
 * 
 * @author Andrew
 *
 */
public class VisualSearchIndexBuilder {
  private static final Logger logger = Logger.getLogger(VisualSearchIndexBuilder.class);
  
  private File mInputFolder;
  private File mIndexFolder;
  private File mTempFolder;
  
  /**
   * This is so you can inject your own creator, the default is SURF, but there
   * is a GPLed SIFT creator that I provide as well
   */
  private InterestPointDAO mDAO;
  private KClusterer mClusterer;
  
  /**
   * The constructor just validates that all the conditions are met for being able to complete a run on the images
   * in a particular folder.
   * 
   * @param inputFolder - Folder with the images
   * @param indexFolder - Folder to be used for storing the indexes
   * @param tempFolder - Scratch folder for temporary files
   * @throws IOException 
   * @throws ConfigurationException
   */
  
  private int mGroupId = -1;
  
  private int mSamplingRate = 1;
  
  public VisualSearchIndexBuilder(String inputFolder, String indexFolder, String tempFolder, InterestPointDAO ipDAO, KClusterer clusterer, int groupId, int samplingRate) throws IOException, ConfigurationException {
    mInputFolder = new File(inputFolder);
    if ( !mInputFolder.exists() || !mInputFolder.canRead() )
      throw new IOException("Unable to read from input folder: " + mInputFolder.getAbsolutePath());
    mIndexFolder = new File(indexFolder);
    if ( !mInputFolder.exists() || !mInputFolder.canWrite() )
      throw new IOException("Invalid index folder: " + mIndexFolder.getAbsolutePath());
    mTempFolder = new File(tempFolder);
    if ( !mTempFolder.exists() || !mTempFolder.canWrite() )
      throw new IOException("Invalid temp folder: " + mTempFolder.getAbsolutePath());
    
    mDAO = ipDAO;
    mClusterer = clusterer;
    
    mGroupId = groupId;
    
    mSamplingRate = samplingRate;
    
    //inits the static config
    Config.getConfiguration();
  }
  
  /**
   * Creates the actual index. This might take some time.
   */
  public void run(){
    logger.info("Starting run...");
    
    fetchNextGroupId();
    if ( mGroupId == -1 ) return;
    
    ThreadedInterestPointGenerator factory = new ThreadedInterestPointGenerator(mInputFolder,mGroupId,mDAO);
    factory.start();
    //TODO: This stop might accidentally kill the last couple of points that need to be inserted if the start() polls at an unfortunate time
    factory.stop();
    
    logger.info("Interest points created, moving onto building the vocabulary tree");
    
    File treeFile = new File(mTempFolder,"tree.bin");
    KMeansTree tree = null;
    if ( !treeFile.exists() ){
      tree = VocabTreeManager.makeSampledTree(Integer.MAX_VALUE,mClusterer,mGroupId,mSamplingRate,mDAO);
      tree.resetBreadthList();
      SerializationUtils.saveObject(tree,treeFile);
    } else {
      tree = VocabTreeManager.loadVocabTree(treeFile.getAbsolutePath());
    }
    logger.info("Vocab tree created, moving onto building the index");
    
    InterestPointCache fullCache = new DirectDBInterestPointCache(mGroupId,Integer.MAX_VALUE,mDAO);
    L1ImageWordIndex index = new L1ImageWordIndex(tree,fullCache);
    File indexFile = new File(mTempFolder,"index.bin");
    SerializationUtils.saveObject(index,indexFile);
    
    logger.info("Big index created, moving onto building the queryable index");
    tree.resetBreadthList();
    
    new QueryTree(tree,mIndexFolder);
    System.out.println("index loaded");
    new WordInvertedIndex(index,mIndexFolder);
    
    logger.info("Done with index creation, nuking temp files");
    
    //treeFile.delete();
    //indexFile.delete();
    
    logger.info("All done!");
  }
  
  /**
   * Fetches the next group id from the db, basically creates a new slate for this run.
   * @return
   */
  private void fetchNextGroupId(){
    if ( mGroupId == -1 ){
      java.sql.Connection conn = DBConnectionManager.getConnection();
      ResultSet rs;
      try {
        rs = conn.prepareStatement("select max(group_id) from " + mDAO.getTablePrefix() + "files").executeQuery();
        if ( rs.next() ) {
          mGroupId = rs.getInt(1) + 1;
        } else {
          mGroupId = 0;
        }
      } catch (SQLException e) {
        logger.fatal(e,e);
      }
    }
  }
  
  public static void main(String args[]){
    if ( args.length != 6 && args.length != 7 ){
      System.out.println("Usage: ant builder -Dargs=\"image_input_folder temp_folder output_folder sift|surf kmeans|kmeansforest [group_id] [samplingRate]\"");
      System.exit(0);
    }
    String sourceDirectory = args[0];
    String tempDirectory = args[1];
    String indexDirectory = args[2];
    InterestPointDAO dao = "sift".equals(args[3]) ? new SIFTInterestPointDAO() : new SURFInterestPointDAO();
    KClusterer clusterer = "kmeans".equals(args[4]) ? new KMeansClusterer() : new KMeansForestClusterer();
    int groupId = -1;
    if ( args.length > 5 ){
      groupId = Integer.parseInt(args[5]);
    }
    int samplingRate = 1;
    if ( args.length > 6 ){
      samplingRate = Integer.parseInt(args[6]);
    }
    System.out.println("Run information");
    System.out.println("tempDirectory = " + tempDirectory);
    System.out.println("indexDirectory = " + indexDirectory);
    System.out.println("sourceDirectory = " + sourceDirectory);
    System.out.println("dao = " + dao.getClass());
    System.out.println("clusterer = " + clusterer.getClass());
    System.out.println("groupId = " + groupId);
    System.out.println("samplingRate = " + samplingRate);
    try {
      VisualSearchIndexBuilder builder = new VisualSearchIndexBuilder(sourceDirectory, indexDirectory, tempDirectory, dao, clusterer, groupId, samplingRate);
      builder.run();
    } catch (ConfigurationException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
    System.exit(0);
  }
}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.