/*
* Copyright 2001-2006 C:1 Financial Services GmbH
*
* This software is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License Version 2.1, as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
*/
package de.finix.contelligent.search.engine;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.document.Document;
import de.finix.contelligent.CallData;
import de.finix.contelligent.ComponentManager;
import de.finix.contelligent.ComponentPath;
import de.finix.contelligent.Session;
import de.finix.contelligent.core.ContelligentImpl;
import de.finix.contelligent.core.security.ContelligentSecurityManager;
import de.finix.contelligent.logging.LoggingService;
import de.finix.contelligent.xml.elements.IndexBuilderElement;
import de.finix.contelligent.xml.elements.IndexBuilderFilterElement;
class Crawler {
final static org.apache.log4j.Logger log = LoggingService.getLogger(Crawler.class);
private ComponentPath root;
private Map includes = new HashMap();
private Map categoryValues;
private Map renderParameters;
private LuceneIndex index;
Collection filters;
public Crawler(LuceneIndex index, IndexBuilderElement element) {
this(new ComponentPath(element.getDir()), element.getIncludes(), element.getCategoryValues(), element
.getRenderParameters(), index, element.getFilters());
}
protected Crawler(ComponentPath root, Map includes, Map categoryValues, Map renderParameters, LuceneIndex index,
Collection filters) {
this.root = root;
this.index = index;
this.filters = filters;
if (includes.isEmpty()) {
this.includes.put("contelligent.website.Page", "");
} else {
this.includes.putAll(includes);
}
this.categoryValues = categoryValues;
if (categoryValues == null) {
this.categoryValues = Collections.EMPTY_MAP;
}
this.renderParameters = renderParameters;
if (renderParameters == null) {
this.renderParameters = Collections.EMPTY_MAP;
}
}
public void run() {
Session session = null;
try {
final ComponentManager cm = ContelligentImpl.getInstance().getRootComponentManager();
session = ContelligentImpl.getInstance().beginSession(ContelligentSecurityManager.getIndexUser(), cm);
CallData callData = ContelligentImpl.getInstance().createCallData(session);
Collection paths = cm.getComponentsInSubtreeFilteredByType(root, includes.keySet());
final Collection filteredPaths = filterPaths(paths, callData);
final LuceneDocumentFactory documentFactory;
if (index.isRender()) {
documentFactory = new RenderingDocumentFactory(categoryValues, renderParameters, includes, cm, session,
callData, ContelligentImpl.getInstance().getCategoryManager());
} else {
documentFactory = new RawDocumentFactory(cm);
}
index.apply(new LuceneIndexAppender() {
public void perform(LuceneIndexAppenderAdapter adapter) {
Iterator iterator = filteredPaths.iterator();
while (iterator.hasNext()) {
ComponentPath componentPath = (ComponentPath) iterator.next();
try {
Iterator documents = documentFactory.createDocuments(componentPath).iterator();
while (documents.hasNext()) {
Document document = (Document) documents.next();
adapter.add(document);
}
} catch (Exception e) {
log.warn("Failed to add component " + componentPath + " to index (" + e.getMessage() + ")");
if (log.isDebugEnabled()) {
log.debug("Failed to add component to index", e);
}
}
}
}
});
} catch (Exception e) {
log.error("run() failed", e);
} finally {
if (session != null) {
ContelligentImpl.getInstance().invalidateSession(session);
}
}
}
/**
* @param paths
* @param callData
* @return
*/
private Collection filterPaths(Collection paths, CallData callData) {
HashSet filteredPaths = new HashSet();
if (filters.size() == 0)
return paths;
Iterator f = filters.iterator();
while (f.hasNext()) {
IndexBuilderFilterElement filterElement = (IndexBuilderFilterElement) f.next();
try {
CrawlerFilter filter = FilterEngine.getInstance().getFilterInstance(filterElement);
filteredPaths.addAll(applyFilterToPaths(filter, paths, filterElement, callData));
} catch (FilterException e) {
log.warn("CrawlerFilter Implementation could not be resolved '" + filterElement.getImpl() + "'", e);
continue;
}
}
if (log.isDebugEnabled()) {
log.debug("filterPaths() - filtered paths: '" + paths + "' => '" + filteredPaths + "'");
}
return filteredPaths;
}
/**
* @param filter
* @param filteredPaths
* @throws FilterException
*/
private Collection applyFilterToPaths(CrawlerFilter filter, Collection pathsToFilter,
IndexBuilderFilterElement filterConfig, CallData callData) throws FilterException {
HashSet result = new HashSet();
Iterator i = pathsToFilter.iterator();
while (i.hasNext()) {
ComponentPath path = (ComponentPath) i.next();
if (filter.filter(path, filterConfig, callData)) {
result.add(path);
}
}
return result;
}
}
|