/*
* Copyright 2001 Sun Microsystems, Inc. All rights reserved.
* PROPRIETARY/CONFIDENTIAL. Use of this product is subject to license terms.
*/
package com.sun.portal.search.db;
import com.sun.portal.search.rdm.*;
import com.sun.portal.search.soif.*;
import com.sun.portal.search.util.*;
import com.sun.portal.log.common.PortalLogger;
import java.util.*;
import java.util.logging.Logger;
import java.util.logging.Level;
import java.io.*;
// XXX this is not finished - want to move taxonomy to db
// (preferably with a stock standard db interface)
/**
* Support for RDM Taxonomy requests
*
* <pre>RDM-Query-Language=Taxonomy-Basic
*
* Scope = descendant Classification | descendant/N Classification
* children Classification | anklebiter Classification
* advise-doc-count/N Classification
*
* where
* Classification = ROOT | Classification-Id
*
* Example RDM messages
* --------------------
*
* To send the entire taxonomy (node and everything below):
*
* @RDMHEADER { -
* RDM-Version{x}: 1.0
* RDM-Type{x}: taxonomy-description-request
* RDM-Query-Language{x}: taxonomy-basic
* }
*
* @RDMQUERY { -
* Scope{x}: descendant ROOT
* }
*
* To send the entire only the children (e.g., node and 1 directly below):
*
* @RDMHEADER { -
* RDM-Version{x}: 1.0
* RDM-Type{x}: taxonomy-description-request
* RDM-Query-Language{x}: taxonomy-basic
* }
*
* @RDMQUERY { -
* Scope{x}: anklebiter ROOT
* }
*
* OR
*
* @RDMHEADER { -
* RDM-Version{x}: 1.0
* RDM-Type{x}: taxonomy-description-request
* RDM-Query-Language{x}: taxonomy-basic
* }
*
* @RDMQUERY { -
* Scope{x}: anklebiter Education/Training:Internal Training Classes
* }
*
* Added in compass 302...
*
* To advise of classified doc counts, eg, when db stat don't match with verity
*
* @RDMHEADER { -
* RDM-Version{x}: 1.0
* RDM-Type{x}: taxonomy-description-request
* RDM-Query-Language{x}: taxonomy-basic
* }
*
* @RDMQUERY { -
* Scope{x}: advise-doc-count/N Internal Engineering
* }
*
*/
public class TaxonomyDb extends SearchOnlyDb {
// XXX This will extend IndexedSOIFDb once tax is a real DB
// classification stats support
public static final String CLASS_KEY = "statistics:classification";
public static final String CLASS_NAME = "s_class";
public static final String CLASS_NODES = "s_nodes";
public static final String CLASS_THIS = "s_nodecnt";
public static final String CLASS_IDXBATCH = "s_idxbatch";
NovaDb searchengine = new NovaDb();
RDMTaxonomy theTax = null;
static Date theTax_lmt = null;
static Date theTax_refresh = null;
static int theTax_refresh_rate = (60 * 5); // default: 5 minutes
// Supported query languages
public static final String QL_TAX_BASIC = "taxonomy-basic";
public static final String QL_SEARCH = "search";
public static final String QL_CLASSIFICATION = "classification";
/** Taxonomy processing support */
class QLTaxQuery {
String class_id;
int depth;
boolean retrieve_rd;
boolean advise_doc_count;
int doc_count;
}
/** Taxonomy processing support */
class QLSearchQuery {
String scope;
}
protected void getTaxBasicResults(RDMRequest req, RDMResponse res, QLTaxQuery qry) throws Exception {
SearchLogger.getLogger().log(Level.FINEST, "PSSH_CSPSB0066", req.getQuery().getScope());
RDMTaxonomy tp = theTax; // use already-parsed Taxonomy
// Find the interesting node
RDMClassification cp = null;
if ((cp = tp.find(qry.class_id)) == null) {
// log - Classification Mismatch: {0}
SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0067", qry.class_id);
res.getHeader().setResponseInterpret("0 results");
return;
}
if (qry.advise_doc_count) {
/**
* Handle classified document count advice
* NB: This processes and logs the request and sets the result hdr
*/
checkTaxonomy(req, res, qry, cp);
return;
}
// XXX This is where we would return either RDs or Categories depending
// on the requested rdm type, but we only support cats at the moment.
// Taxonomy description request handling
RDMView view = new RDMView(req);
// XXX special case for taxonomy bwd compat
// - if view-hits not given, default to all (instead of 10, etc)
if (req.getQuery().getViewHits() == null)
view.hits.max = RDMViewHits.RDMVIEWHITS_MAX;
// XXX Temporary results buffer so we can count the results before sending them (XXX - not for RDs)
SOIFBuffer sb = new SOIFBuffer();
SOIFOutputStream ss = new SOIFOutputStream(sb);
// Print leading Taxonomy information
if (!qry.retrieve_rd)
ss.write(tp.getSOIF());
// Set View-Attributes if available
if (view != null && view.attr != null) {
SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0068", view.attr);
ss.setAllowed(view.attr);
}
// Traverse the Taxonomy and save the results
QLTaxTraverse tt = new QLTaxTraverse();
tt.t = tp;
tt.qry = qry;
tt.n = 0;
tt.total = 0;
tt.max = view.hits.max;
tt.depth_start = tp.depth(cp);
if (qry.depth == -1)
tt.depth_max = -1; // unlimited
else
tt.depth_max = qry.depth + tt.depth_start;
tt.ss = ss;
SearchLogger.getLogger().log(Level.FINER, "PSSH_CSPSB0078",
new Object[] { Integer.toString(tt.depth_start), Integer.toString(tt.depth_max) });
cp.apply(RDM.RDM_TAX_INORDER, tt);
// Header for taxonomy description results
res.getHeader().setResponseInterpret(tt.n + " results out of " + tt.n + " hits across " + tt.total + " documents");
// Log message - written to disk later
req.logRDM("xfer=" + tt.n + " scope=\"" + req.getQuery().getScope() + "\"");
// send out the results
res.sendHeader();
res.getOutputStream().write(sb.toByteArray());
}
/**
* Handles advisory corrections to the classified document count stats.
* (the stats are kept in a rather unreliable way)
*/
protected void checkTaxonomy(RDMRequest req, RDMResponse res, QLTaxQuery qry, RDMClassification cp) throws Exception {
int old_count = 0, new_count = 0, delta = 0;
/**
* We've got doc count advice - check the tax and correct any errors.
* If we detect an incorrect count, adjust ndescdocs of this and
* all parent nodes by the delta.
* XXX This is pretty weak - we really need to do verity
* searches to get the exact numbers for every node, but that's
* a job for a command line utility. This should keep the plebs happy :)
*
* NB: We are called with the tax write lock already held
*/
SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0079",
new Object[] { Integer.toString(cp.getNumDocs()), Integer.toString(qry.doc_count),
qry.class_id});
old_count = cp.getNumDocs();
new_count = qry.doc_count;
if ((delta = (new_count - old_count)) != 0) {
RDMClassification cx = cp;
cx.setNumDocs(new_count);
cx.getSOIF().replace(RDM.A_RDM_NCATDOC, "" + cx.getNumDocs());
do {
/** Could just call the tax tree load walker again here,
* but this is way more efficient (does it matter?)...
*/
int desc_cnt = cx.getNumDescDocs() + delta;
if (desc_cnt >= 0) { // sanity check
cx.setNumDescDocs(desc_cnt);
cx.getSOIF().replace(RDM.A_RDM_NSUBDOC, "" + desc_cnt);
}
else {
// log - Count went -ve while adjusting classified doc counts
SearchLogger.getLogger().log(Level.WARNING, "PSSH_CSPSB0080");
break;
}
} while ((cx = cx.getParent()) != null);
// save the new stats
// XXX this should be saved in a taxonomy db (along with the taxonomy)
// XXXXXX dbaccess.saveCategorizedCount(null, qry.class_id, new_count);
}
// Header for advise-doc-count results
// XXX not sending any results back for this at the moment
/**
* sprintf(msg, "Advise doc count: old count = %d new count = %d for %s",
* old_count, new_count, qry.class_id);
* RDMHeader_SetResponseInterpret(res.header, msg);
*/
// Log message - written to disk later
req.logRDM("taxonomy advise - old count="
+ old_count + " scope=\"" + req.getQuery().getScope() + "\"");
}
class QLTaxTraverse implements RDMCallback {
RDMTaxonomy t;
QLTaxQuery qry;
SOIFOutputStream ss;
int depth_max, depth_start;
int n, max, total;
public void callback(Object o) throws Exception {
RDMClassification c = (RDMClassification)o;
String cid;
int depth = 0;
total++;
// Skip taxonomy root
/**
* if (c == t.root)
* return;
*/
// Is this a Classification that we want to use?
if ((depth_max != -1) &&
((depth = t.depth(c)) > depth_max))
return;
if (max != 0 && n >= max)
return;
// Generate the Classification information that we want
cid = c.getId();
SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0081", new Object[] { cid, Integer.toString(depth) });
if (qry.retrieve_rd) {
// nothing
SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0069", cid);
// XXX not finished
} else {
ss.write(c.getSOIF());
}
n++;
}
}
/** preload the parsed taxonomy */
protected void loadTaxonomy(String taxfn) throws Exception {
Date lmt = new Date(new File(taxfn).lastModified());
// Shortcut reload if needed
if (theTax_lmt == lmt) { // nothing changed
SearchLogger.getLogger().log(Level.FINER, "PSSH_CSPSB0070");
return;
}
SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0071");
SOIFInputStream ss = null;
try {
ss = new SOIFInputStream(taxfn);
}
catch (Exception e) {
SearchLogger.getLogger().log(Level.WARNING, "PSSH_CSPSB0082", taxfn);
throw e;
}
// Nuke the existing preloaded taxonomy - if needed
//if (theTax)
//RDMTaxonomy_Free(theTax);
SearchLogger.getLogger().log(Level.FINEST, "PSSH_CSPSB0072", taxfn);
try {
theTax = new RDMTaxonomy(ss); // Parse taxonomy
}
catch (Exception e) {
SearchLogger.getLogger().log(Level.WARNING, "PSSH_CSPSB0073", taxfn);
throw new Exception("Failed to parse taxonomy " + taxfn);
}
//SOIFStream_Finish(ss);
//fp.fclose();
// Load/calculate the classified docs info for each classification
SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0074");
//XXX s = RDM_Database_ReadCategorized(null);
SOIF s = new SOIF();
theTax.apply(RDM.RDM_TAX_POSTORDER, new AddCatContent(s));
// Add the sub-node and doc/sub-doc info to all of the Class SOIFs
SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0075");
theTax.apply(RDM.RDM_TAX_INORDER, new UpdateClassSOIF());
// Set some properties about the preloaded taxonomy
theTax_lmt = lmt;
theTax_refresh = new Date(new Date().getTime() + theTax_refresh_rate*1000);
}
/********************************************************
* This callback class runs the dbaccess function
* RDM_Database_CategorizedCount() for each node in the taxonomy
* and copies the value (number of docs in this node) into
* the node member c.ndocs.
* It also sums the descendant doc count information.
* Must be called POSTORDER.
********************************************************/
class AddCatContent implements RDMCallback {
SOIF stats_soif;
AddCatContent(SOIF s) {
stats_soif = s;
}
public void callback(Object o) throws Exception {
RDMClassification c = (RDMClassification)o;
if (c.getDepth() == 0) // skip ROOT XXX ???
return;
if (stats_soif == null)
return; // no stats - counts will all be zero
//c.setNumDocs(RDM_Database_CategorizedCount(stats_soif, c.getId()));
// handle descendant doc summing
c.setNumDescDocs(c.getNumDescDocs() + c.getNumDocs());
RDMClassification p = c.getParent();
if (p != null)
p.setNumDescDocs(p.getNumDescDocs() + c.getNumDescDocs());
}
}
/********************************************************
* This callback class copies the ndescendant and categorized doc
* info from the classification structure into the classification
* soif.
********************************************************/
class UpdateClassSOIF implements RDMCallback {
public void callback(Object o) throws Exception {
RDMClassification c = (RDMClassification)o;
int n = 0;
if (c.getDepth() == 0) // skip ROOT XXX ???
return;
SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0076",
new Object[] { Integer.toString(c.getNumDescendant()), Integer.toString(c.getNumDocs()),
Integer.toString(c.getNumDescDocs()), c.getId() });
// update the class soif
c.getSOIF().replace(RDM.A_RDM_NSUBCAT, "" + c.getNumDescendant());
c.getSOIF().replace(RDM.A_RDM_NCATDOC, "" + c.getNumDocs());
c.getSOIF().replace(RDM.A_RDM_NSUBDOC, "" + c.getNumDescDocs());
if (c.getChildren() != null) {
RDMClassification child;
for (n = 0; n < c.nChildren(); ++n) {
String id = c.nthChild(n).getId();
String category = id.substring(id.lastIndexOf(':') + 1); // should never fail
c.getSOIF().replace(RDM.A_RDM_CHILD, category, n);
}
}
}
}
/**
* open -- Opens a Search Database
* - rootdir -- db home dir
* - dbname -- name of database from root.db (e.g., default)
* - rw -- SOIFDb.WRITER or SOIFDb.WRCREAT or SOIFDb.READER
* - mode -- Unix mode
*/
public void open(SToken st, String rootdir, String dbname, int rw, int mode) throws RDMException {
searchengine.open(st, rootdir, dbname, rw, mode);
refreshTaxonomy(dbname);
}
/** Taxonomy reload */
protected synchronized void refreshTaxonomy(String dbname) throws RDMException {
Date now = new Date();
if (theTax == null || theTax_refresh.before(now)) {
// Reload the taxonomy now...
init(dbname);
theTax_refresh = new Date(now.getTime() + theTax_refresh_rate*1000);
}
}
public void init(String dbname) throws RDMException {
// XXX ignoring tax dbname for now - will honour when tax becomes a real db
String taxfn = SearchConfig.getValue(SearchConfig.TAX);
String taxrr = SearchConfig.getValue(SearchConfig.TAX_REFRESH);
theTax_lmt = new Date(0);
theTax_refresh = new Date(0);
// Load the taxonomy from disk into RDMTaxonomy object
if (taxfn == null) { // Locate taxonomy
SearchLogger.getLogger().log(Level.WARNING, "PSSH_CSPSB0083", SearchConfig.TAX);
throw new RDMException("Failed to initialize");
}
if (taxrr != null)
theTax_refresh_rate = Integer.parseInt(taxrr);
try {
loadTaxonomy(taxfn);
}
catch (Exception e) {
SearchLogger.getLogger().log(Level.WARNING, "PSSH_CSPSB0084", taxfn);
throw new RDMException(e.getMessage()); // XXX
}
SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0077", taxfn);
}
/**
* Closes db and index extents
* @param st
* @throws RDMException
*/
public void close(SToken st) throws RDMException {
searchengine.close(st);
}
/**
* Query language support
* @param st
* @throws RDMException
* @return Set of supported query languages (String)
*/
// public Set getSupportedQueryLanguages(SToken st) throws RDMException {
// throw new RDMException("not implemented");
//}
/**
* Returns 0 if present, or Db.DB_NOTFOUND or DBb.DB_KEYEMPTY (for missing recno key)
* If result is null, simply checks for presence of key in db.
* @param st
* @param key
* @param result
* @param flags
* @param t
* @throws RDMException
* @return */
public int fetch(SToken st, Datum key, Datum result, int flags, RDMTransaction t) throws RDMException {
throw new RDMException("not implemented");
}
/**
* retrieve RD from database, filtered by view
*/
public SOIF fetch(SToken st, String url, Set view, int flags, RDMTransaction t) throws RDMException {
throw new RDMException("not implemented");
}
public SOIF fetch(SToken st, String url, int flags, RDMTransaction t) throws RDMException {
throw new RDMException("not implemented");
}
/**
* Recover the db - must be run stand alone (ie, no one else has the db open)
*/
public void recover(String dbhome, boolean fatal) throws RDMException {
}
}
|