TaxonomyDb.java :  » Portal » Open-Portal » com » sun » portal » search » db » Java Open Source

Java Open Source » Portal » Open Portal 
Open Portal » com » sun » portal » search » db » TaxonomyDb.java
/*
 * Copyright 2001 Sun Microsystems, Inc.  All rights reserved.
 * PROPRIETARY/CONFIDENTIAL.  Use of this product is subject to license terms.
 */

package com.sun.portal.search.db;

import com.sun.portal.search.rdm.*;
import com.sun.portal.search.soif.*;
import com.sun.portal.search.util.*;
import com.sun.portal.log.common.PortalLogger;

import java.util.*;
import java.util.logging.Logger;
import java.util.logging.Level;
import java.io.*;

// XXX this is not finished - want to move taxonomy to db
// (preferably with a stock standard db interface)

/**
 *  Support for RDM Taxonomy requests
 *
 *  <pre>RDM-Query-Language=Taxonomy-Basic
 *
 *  Scope = descendant Classification | descendant/N Classification
 *          children Classification   | anklebiter Classification
 *          advise-doc-count/N Classification
 *
 *    where
 *      Classification = ROOT | Classification-Id
 *
 *  Example RDM messages
 *  --------------------
 *
 *  To send the entire taxonomy (node and everything below):
 *
 *    @RDMHEADER { -
 *    RDM-Version{x}:  1.0
 *    RDM-Type{x}:  taxonomy-description-request
 *    RDM-Query-Language{x}:  taxonomy-basic
 *    }
 *
 *    @RDMQUERY { -
 *    Scope{x}:  descendant ROOT
 *    }
 *
 *  To send the entire only the children (e.g., node and 1 directly below):
 *
 *    @RDMHEADER { -
 *    RDM-Version{x}:  1.0
 *    RDM-Type{x}:  taxonomy-description-request
 *    RDM-Query-Language{x}:  taxonomy-basic
 *    }
 *
 *    @RDMQUERY { -
 *    Scope{x}:  anklebiter ROOT
 *    }
 *
 *    OR
 *
 *    @RDMHEADER { -
 *    RDM-Version{x}:  1.0
 *    RDM-Type{x}:  taxonomy-description-request
 *    RDM-Query-Language{x}:  taxonomy-basic
 *    }
 *
 *    @RDMQUERY { -
 *    Scope{x}:  anklebiter Education/Training:Internal Training Classes
 *    }
 *
 *  Added in compass 302...
 *
 *  To advise of classified doc counts, eg, when db stat don't match with verity
 *
 *    @RDMHEADER { -
 *    RDM-Version{x}:  1.0
 *    RDM-Type{x}:  taxonomy-description-request
 *    RDM-Query-Language{x}:  taxonomy-basic
 *    }
 *
 *    @RDMQUERY { -
 *    Scope{x}:  advise-doc-count/N Internal Engineering
 *    }
 *
 */
public class TaxonomyDb extends SearchOnlyDb {

    // XXX This will extend IndexedSOIFDb once tax is a real DB

    // classification stats support
    public static final String CLASS_KEY      = "statistics:classification";
    public static final String CLASS_NAME      = "s_class";
    public static final String CLASS_NODES      = "s_nodes";
    public static final String CLASS_THIS      = "s_nodecnt";
    public static final String CLASS_IDXBATCH      = "s_idxbatch";

    NovaDb searchengine = new NovaDb();

    RDMTaxonomy theTax = null;
    static Date theTax_lmt = null;
    static Date theTax_refresh = null;
    static int theTax_refresh_rate = (60 * 5);  // default: 5 minutes

    // Supported query languages
    public static final String QL_TAX_BASIC = "taxonomy-basic";
    public static final String QL_SEARCH = "search";
    public static final String QL_CLASSIFICATION = "classification";

    /** Taxonomy processing support */
    class QLTaxQuery {
  String class_id;
  int depth;
  boolean retrieve_rd;
  boolean advise_doc_count;
  int doc_count;
    }

    /** Taxonomy processing support */
    class QLSearchQuery {
  String scope;
    }


    protected void getTaxBasicResults(RDMRequest req, RDMResponse res, QLTaxQuery qry) throws Exception {

  SearchLogger.getLogger().log(Level.FINEST, "PSSH_CSPSB0066", req.getQuery().getScope());

  RDMTaxonomy tp = theTax;  // use already-parsed Taxonomy

  // Find the interesting node
  RDMClassification cp = null;
  if ((cp = tp.find(qry.class_id)) == null) {
      // log - Classification Mismatch: {0}
      SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0067", qry.class_id);
      res.getHeader().setResponseInterpret("0 results");
      return;
  }

  if (qry.advise_doc_count) {
      /**
       * Handle classified document count advice
       * NB: This processes and logs the request and sets the result hdr
       */
      checkTaxonomy(req, res, qry, cp);
      return;
  }

  // XXX This is where we would return either RDs or Categories depending
  // on the requested rdm type, but we only support cats at the moment.

  // Taxonomy description request handling
  RDMView view = new RDMView(req);
  // XXX special case for taxonomy bwd compat
  // - if view-hits not given, default to all (instead of 10, etc)
  if (req.getQuery().getViewHits() == null)
      view.hits.max = RDMViewHits.RDMVIEWHITS_MAX;

  // XXX Temporary results buffer so we can count the results before sending them (XXX - not for RDs)
  SOIFBuffer sb = new SOIFBuffer();
  SOIFOutputStream ss = new SOIFOutputStream(sb);

  // Print leading Taxonomy information
  if (!qry.retrieve_rd)
      ss.write(tp.getSOIF());

  // Set View-Attributes if available
  if (view != null && view.attr != null) {
      SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0068", view.attr);
      ss.setAllowed(view.attr);
  }

  // Traverse the Taxonomy and save the results
  QLTaxTraverse tt = new QLTaxTraverse();
  tt.t = tp;
  tt.qry = qry;
  tt.n = 0;
  tt.total = 0;
  tt.max = view.hits.max;
  tt.depth_start = tp.depth(cp);
  if (qry.depth == -1)
      tt.depth_max = -1;    // unlimited
  else
      tt.depth_max = qry.depth + tt.depth_start;
  tt.ss = ss;
  SearchLogger.getLogger().log(Level.FINER, "PSSH_CSPSB0078",
    new Object[] { Integer.toString(tt.depth_start), Integer.toString(tt.depth_max) });
  cp.apply(RDM.RDM_TAX_INORDER, tt);

  // Header for taxonomy description results
  res.getHeader().setResponseInterpret(tt.n + " results out of " + tt.n + " hits across " + tt.total + " documents");

  // Log message - written to disk later
  req.logRDM("xfer=" + tt.n + " scope=\"" + req.getQuery().getScope() + "\"");

  // send out the results
  res.sendHeader();
  res.getOutputStream().write(sb.toByteArray());

    }


    /**
     * Handles advisory corrections to the classified document count stats.
     * (the stats are kept in a rather unreliable way)
     */
    protected void checkTaxonomy(RDMRequest req, RDMResponse res, QLTaxQuery qry, RDMClassification cp) throws Exception {
  int old_count = 0, new_count = 0, delta = 0;

  /**
   * We've got doc count advice - check the tax and correct any errors.
   * If we detect an incorrect count, adjust ndescdocs of this and
   * all parent nodes by the delta.
   * XXX This is pretty weak - we really need to do verity
   * searches to get the exact numbers for every node, but that's
   * a job for a command line utility. This should keep the plebs happy :)
   *
   * NB: We are called with the tax write lock already held
   */

  SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0079",
    new Object[] { Integer.toString(cp.getNumDocs()), Integer.toString(qry.doc_count),
    qry.class_id});

  old_count = cp.getNumDocs();
  new_count = qry.doc_count;
  if ((delta = (new_count - old_count)) != 0) {
      RDMClassification cx = cp;
      cx.setNumDocs(new_count);
      cx.getSOIF().replace(RDM.A_RDM_NCATDOC, "" + cx.getNumDocs());
      do {
    /** Could just call the tax tree load walker again here,
     * but this is way more efficient (does it matter?)...
     */
    int desc_cnt = cx.getNumDescDocs() + delta;
    if (desc_cnt >= 0) { // sanity check
        cx.setNumDescDocs(desc_cnt);
        cx.getSOIF().replace(RDM.A_RDM_NSUBDOC, "" + desc_cnt);
    }
    else {
        // log - Count went -ve while adjusting classified doc counts
        SearchLogger.getLogger().log(Level.WARNING, "PSSH_CSPSB0080");
        break;
    }
      } while ((cx = cx.getParent()) != null);

      // save the new stats
      // XXX this should be saved in a taxonomy db (along with the taxonomy)
      // XXXXXX dbaccess.saveCategorizedCount(null, qry.class_id, new_count);
  }

  // Header for advise-doc-count results
  // XXX not sending any results back for this at the moment
  /**
   * sprintf(msg, "Advise doc count: old count = %d new count = %d for %s",
   * old_count, new_count, qry.class_id);
   * RDMHeader_SetResponseInterpret(res.header, msg);
   */

  // Log message - written to disk later
  req.logRDM("taxonomy advise - old count="
    + old_count + " scope=\"" + req.getQuery().getScope() + "\"");
    }


    class QLTaxTraverse implements RDMCallback {

  RDMTaxonomy t;
  QLTaxQuery qry;
  SOIFOutputStream ss;
  int depth_max, depth_start;
  int n, max, total;

  public void callback(Object o) throws Exception {
      RDMClassification c = (RDMClassification)o;
      String cid;
      int depth = 0;

      total++;
      // Skip taxonomy root
      /**
       * if (c == t.root)
       * return;
       */

      // Is this a Classification that we want to use?
      if ((depth_max != -1) &&
        ((depth = t.depth(c)) > depth_max))
    return;

      if (max != 0 && n >= max)
    return;

      // Generate the Classification information that we want
      cid = c.getId();
      SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0081", new Object[] { cid, Integer.toString(depth) });
      if (qry.retrieve_rd) {
    // nothing
    SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0069", cid);
    // XXX not finished
      } else {
    ss.write(c.getSOIF());
      }
      n++;
  }
    }


    /** preload the parsed taxonomy */
    protected void loadTaxonomy(String taxfn) throws Exception {

  Date lmt = new Date(new File(taxfn).lastModified());

  // Shortcut reload if needed
  if (theTax_lmt == lmt) {  // nothing changed
      SearchLogger.getLogger().log(Level.FINER, "PSSH_CSPSB0070");
      return;
  }

  SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0071");
  SOIFInputStream ss = null;
  try {
      ss = new SOIFInputStream(taxfn);
  }
  catch (Exception e) {
      SearchLogger.getLogger().log(Level.WARNING, "PSSH_CSPSB0082", taxfn);
      throw e;
  }

  // Nuke the existing preloaded taxonomy - if needed
  //if (theTax)
  //RDMTaxonomy_Free(theTax);

  SearchLogger.getLogger().log(Level.FINEST, "PSSH_CSPSB0072", taxfn);
  try {
      theTax = new RDMTaxonomy(ss);      // Parse taxonomy
  }
  catch (Exception e) {
      SearchLogger.getLogger().log(Level.WARNING, "PSSH_CSPSB0073", taxfn);
      throw new Exception("Failed to parse taxonomy " + taxfn);
  }
  //SOIFStream_Finish(ss);
  //fp.fclose();

  // Load/calculate the classified docs info for each classification
  SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0074");
  //XXX s = RDM_Database_ReadCategorized(null);
  SOIF s = new SOIF();
  theTax.apply(RDM.RDM_TAX_POSTORDER, new AddCatContent(s));

  // Add the sub-node and doc/sub-doc info to all of the Class SOIFs
  SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0075");
  theTax.apply(RDM.RDM_TAX_INORDER, new UpdateClassSOIF());

  // Set some properties about the preloaded taxonomy
  theTax_lmt = lmt;
  theTax_refresh = new Date(new Date().getTime() + theTax_refresh_rate*1000);
    }

    /********************************************************
     * This callback class runs the dbaccess function
     * RDM_Database_CategorizedCount() for each node in the taxonomy
     * and copies the value (number of docs in this node) into
     * the node member c.ndocs.
     * It also sums the descendant doc count information.
     * Must be called POSTORDER.
     ********************************************************/

    class AddCatContent implements RDMCallback {
  SOIF stats_soif;
  AddCatContent(SOIF s) {
      stats_soif = s;
  }
  public void callback(Object o) throws Exception {
      RDMClassification c = (RDMClassification)o;

      if (c.getDepth() == 0)    // skip ROOT XXX ???
    return;

      if (stats_soif == null)
    return;      // no stats - counts will all be zero

      //c.setNumDocs(RDM_Database_CategorizedCount(stats_soif, c.getId()));

      // handle descendant doc summing
      c.setNumDescDocs(c.getNumDescDocs() + c.getNumDocs());
      RDMClassification p = c.getParent();
      if (p != null)
    p.setNumDescDocs(p.getNumDescDocs() + c.getNumDescDocs());
  }
    }

    /********************************************************
     * This callback class copies the ndescendant and categorized doc
     * info from the classification structure into the classification
     * soif.
     ********************************************************/

    class UpdateClassSOIF implements RDMCallback {
  public void callback(Object o) throws Exception {
      RDMClassification c = (RDMClassification)o;
      int n = 0;

      if (c.getDepth() == 0)  // skip ROOT XXX ???
    return;

      SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0076",
        new Object[] { Integer.toString(c.getNumDescendant()), Integer.toString(c.getNumDocs()),
        Integer.toString(c.getNumDescDocs()), c.getId() });

      // update the class soif
      c.getSOIF().replace(RDM.A_RDM_NSUBCAT, "" + c.getNumDescendant());
      c.getSOIF().replace(RDM.A_RDM_NCATDOC, "" + c.getNumDocs());
      c.getSOIF().replace(RDM.A_RDM_NSUBDOC, "" + c.getNumDescDocs());

      if (c.getChildren() != null) {
    RDMClassification child;
    for (n = 0; n < c.nChildren(); ++n) {
        String id = c.nthChild(n).getId();
        String category = id.substring(id.lastIndexOf(':') + 1); // should never fail
        c.getSOIF().replace(RDM.A_RDM_CHILD, category, n);
    }
      }
  }
    }


    /**
     * open -- Opens a Search Database
     *   - rootdir -- db home dir
     *   - dbname -- name of database from root.db (e.g., default)
     *   - rw -- SOIFDb.WRITER or SOIFDb.WRCREAT or SOIFDb.READER
     *   - mode -- Unix mode
     */
    public void open(SToken st, String rootdir, String dbname, int rw, int mode) throws RDMException {
  searchengine.open(st, rootdir, dbname, rw, mode);
  refreshTaxonomy(dbname);
    }

    /** Taxonomy reload */
    protected synchronized void refreshTaxonomy(String dbname) throws RDMException {
  Date now = new Date();
  if (theTax == null || theTax_refresh.before(now)) {
      // Reload the taxonomy now...
      init(dbname);
      theTax_refresh = new Date(now.getTime() + theTax_refresh_rate*1000);
  }
    }

    public void init(String dbname) throws RDMException {

  // XXX ignoring tax dbname for now - will honour when tax becomes a real db
  String taxfn = SearchConfig.getValue(SearchConfig.TAX);
  String taxrr = SearchConfig.getValue(SearchConfig.TAX_REFRESH);

  theTax_lmt = new Date(0);
  theTax_refresh = new Date(0);

  // Load the taxonomy from disk into RDMTaxonomy object
  if (taxfn == null) {          // Locate taxonomy
      SearchLogger.getLogger().log(Level.WARNING, "PSSH_CSPSB0083", SearchConfig.TAX);
      throw new RDMException("Failed to initialize");
  }

  if (taxrr != null)
      theTax_refresh_rate = Integer.parseInt(taxrr);

  try {
      loadTaxonomy(taxfn);
  }
  catch (Exception e) {
      SearchLogger.getLogger().log(Level.WARNING, "PSSH_CSPSB0084", taxfn);
      throw new RDMException(e.getMessage()); // XXX
  }

  SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0077", taxfn);
    }


    /**
     * Closes db and index extents
     * @param st
     * @throws RDMException
     */
    public void close(SToken st) throws RDMException {
  searchengine.close(st);
    }

    /**
     * Query language support
     * @param st
     * @throws RDMException
     * @return Set of supported query languages (String)
     */
    // public Set getSupportedQueryLanguages(SToken st) throws RDMException {
    //  throw new RDMException("not implemented");
    //}

    /**
     * Returns 0 if present, or Db.DB_NOTFOUND or DBb.DB_KEYEMPTY (for missing recno key)
     * If result is null, simply checks for presence of key in db.
     * @param st
     * @param key
     * @param result
     * @param flags
     * @param t
     * @throws RDMException
     * @return   */
    public int fetch(SToken st, Datum key, Datum result, int flags, RDMTransaction t) throws RDMException {
  throw new RDMException("not implemented");
    }

    /**
     * retrieve RD from database, filtered by view
     */
    public SOIF fetch(SToken st, String url, Set view, int flags, RDMTransaction t) throws RDMException {
  throw new RDMException("not implemented");
    }

    public SOIF fetch(SToken st, String url, int flags, RDMTransaction t) throws RDMException {
  throw new RDMException("not implemented");
    }

    /**
     * Recover the db - must be run stand alone (ie, no one else has the db open)
     */
    public void recover(String dbhome, boolean fatal) throws RDMException {
    }    

}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.