com.seajas.search.profiler.service.repository.RepositoryService.java Source code

Java tutorial

Introduction

Here is the source code for com.seajas.search.profiler.service.repository.RepositoryService.java

Source

/**
 * Copyright (C) 2013 Seajas, the Netherlands.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 3, as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package com.seajas.search.profiler.service.repository;

import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.mongodb.MongoException;
import com.mongodb.gridfs.GridFS;
import com.mongodb.gridfs.GridFSDBFile;
import com.seajas.search.bridge.jms.model.CompositeEntry;
import com.seajas.search.bridge.jms.model.state.CompositeState;
import java.util.ArrayList;
import java.util.Date;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.lang.StringEscapeUtils;
import org.bson.types.ObjectId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.dao.DataAccessException;
import org.springframework.data.domain.Sort;
import org.springframework.data.mongodb.MongoDbFactory;
import org.springframework.data.mongodb.core.DocumentCallbackHandler;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.index.Index;
import org.springframework.data.mongodb.core.query.Criteria;
import org.springframework.data.mongodb.core.query.Order;
import org.springframework.data.mongodb.core.query.Query;
import org.springframework.stereotype.Service;

import static org.springframework.data.mongodb.core.query.Criteria.where;

/**
 * MongoDB-based repository service.
 * 
 * @author Jasper van Veghel <jasper@seajas.com>
 */
@Service
public class RepositoryService {
    /**
     * Constants.
     */
    private static final Integer MAX_ACCEPTABLE_RESULT_SIZE = 1000;

    private static final Long MAX_INDEX_CREATION_COUNT = 1000L;

    private static final String[] INDEXES = { "currentState", "failureState",

            "source.id", "source.collection",
            // "source.resultParameters", // XXX: Don't index this, as it's inefficient and doesn't really work

            "originalContent.hostname", "originalContent.dateSubmitted",

            "modifiedContent.dateSubmitted",

            "enricherDocument._id" };

    /**
     * The logger.
     */
    private static final Logger logger = LoggerFactory.getLogger(RepositoryService.class);

    /**
     * The static logger.
     */
    private static final Logger staticLogger = LoggerFactory.getLogger(RepositoryService.class);

    /**
     * The MongoDB template.
     */
    private MongoTemplate mongoTemplate;

    /**
     * The default collection.
     */
    @Value("${bridged.project.mongo.db.collection}")
    private String defaultCollection;

    /**
     * The GridFS store.
     */
    private GridFS gridFs;

    /**
     * Default constructor.
     *
     * @param dbFactory
     */
    @Autowired
    public RepositoryService(final MongoDbFactory dbFactory, final MongoTemplate mongoTemplate) {
        this.gridFs = new GridFS(dbFactory.getDb());

        // Initialize the indexes only if there aren't too many documents in the store yet

        Long currentCount = mongoTemplate.count(new Query(), CompositeEntry.class);

        // Determine whether the given indexes have been created, and initialize them if necessary

        if (currentCount <= MAX_INDEX_CREATION_COUNT)
            for (String index : INDEXES)
                mongoTemplate.indexOps(CompositeEntry.class).ensureIndex(new Index().on(index, Order.ASCENDING));

        this.mongoTemplate = mongoTemplate;
    }

    /**
     * Retrieve a paged list of all resources within the repository.
     *
     * @param collection
     * @param sourceId
     * @param taxonomyMatch
     * @param startDate
     * @param endDate
     * @param pagerStart
     * @param pagerResults
     * @param parameters
     * @return RepositoryResult
     */
    public RepositoryResult findResources(final String collection, final Integer sourceId,
            final String taxonomyMatch, final Date startDate, final Date endDate, final Integer pagerStart,
            final Integer pagerResults, final Map<String, String> parameters) {
        Query query = createQuery(false, collection, sourceId, taxonomyMatch, startDate, endDate, null, parameters);

        query.with(new Sort(Sort.Direction.DESC, "originalContent.dateSubmitted"));

        if (logger.isInfoEnabled())
            logger.info("About to count the number of results - which can potentially take a while - query = "
                    + query.toString());

        // First perform a count

        Long totalResults = mongoTemplate.count(query, defaultCollection);

        if (logger.isInfoEnabled())
            logger.info("Counted " + totalResults + " result(s) to be retrieved from the storage back-end");

        // Then add paging parameters to the query

        query.skip(pagerStart);
        query.limit(pagerResults);

        // And build up the result

        List<RepositoryResource> results = new ArrayList<RepositoryResource>(pagerResults);
        List<CompositeEntry> entries = mongoTemplate.find(query, CompositeEntry.class, defaultCollection);

        for (CompositeEntry entry : entries)
            results.add(new RepositoryResource(entry.getOriginalContent().getUri().toString(),
                    entry.getSource().getCollection(), entry.getSource().getId(),
                    entry.getOriginalContent().getHostname(), entry.getOriginalContent().getDateSubmitted(),
                    entry.getId().toString()));

        return new RepositoryResult(pagerStart, pagerResults, totalResults, results);
    }

    /**
     * Delete all given resources from the repository.
     * 
     * @param collection
     * @param sourceId
     * @param url
     * @param startDate
     * @param endDate
     * @return boolean
     */
    @SuppressWarnings("deprecation")
    public boolean deleteResources(final String collection, final Integer sourceId, final String url,
            final Date startDate, final Date endDate) {
        try {
            Query query = createQuery(true, collection, sourceId, null, startDate, endDate, url, null);

            if (logger.isInfoEnabled())
                logger.info("Removing entries from the repository");

            // First delete all GridFS references

            mongoTemplate.executeQuery(query, defaultCollection, new DocumentCallbackHandler() {
                @Override
                public void processDocument(final DBObject dbObject) throws MongoException, DataAccessException {
                    if (dbObject.get("originalContent") != null) {
                        ObjectId originalId = (ObjectId) ((BasicDBObject) dbObject.get("originalContent"))
                                .get("_id");

                        gridFs.remove(originalId);
                    }

                    if (dbObject.get("modifiedContent") != null) {
                        ObjectId modifiedId = (ObjectId) ((BasicDBObject) dbObject.get("modifiedContent"))
                                .get("_id");

                        gridFs.remove(modifiedId);
                    }
                }
            });

            // Then delete the repository documents

            mongoTemplate.remove(query, defaultCollection);

            return true;
        } catch (RuntimeException e) {
            logger.error("Unable to remove the given resource(s) from the repository", e);

            return false;
        }
    }

    /**
     * Retrieve a paged list of all resources within the repository.
     * 
     * @param collection
     * @param sourceId
     * @param taxonomyMatch
     * @return RepositoryStatistic
     */
    public RepositoryStatistic countResources(final String collection, final Integer sourceId,
            final String taxonomyMatch) {
        Query query = createQuery(false, collection, sourceId, taxonomyMatch, null, null, null, null);

        Long totalResults = mongoTemplate.count(query, defaultCollection);
        Date lastAccessed = null;

        if (totalResults > 0)
            lastAccessed = mongoTemplate.findOne(query, CompositeEntry.class, defaultCollection)
                    .getOriginalContent().getDateSubmitted();

        return new RepositoryStatistic(totalResults, lastAccessed);
    }

    /**
     * Retrieve a resource using the given document path.
     * 
     * @param entryId
     * @return RepositoryContent
     */
    public RepositoryContent getResource(final String entryId) {
        CompositeEntry entry = mongoTemplate.findById(new ObjectId(entryId), CompositeEntry.class,
                defaultCollection);

        if (entry.getOriginalContent() == null)
            throw new IllegalArgumentException("No original content was set for resource identified by " + entryId);

        GridFSDBFile dbFile = gridFs.find(entry.getOriginalContent().getId());

        return new RepositoryContent(dbFile.getInputStream(), entry.getOriginalContent().getMediaType());
    }

    /**
     * Process a paged list of all resources within the repository.
     * 
     * @param collection
     * @param sourceId
     * @param taxonomyMatch
     * @param url
     * @param startDate
     * @param endDate
     * @param parameters
     * @param rangeStart
     * @param rangeEnd
     * @param processor
     * @return boolean
     */
    public boolean processResources(final String collection, final Integer sourceId, final String taxonomyMatch,
            final String url, final Date startDate, final Date endDate, final Map<String, String> parameters,
            final Integer rangeStart, final Integer rangeEnd, final RepositoryProcessor processor) {
        Query query = createQuery(true, collection, sourceId, taxonomyMatch, startDate, endDate, url, parameters);

        query.fields().include("_id");
        query.fields().include("currentState");
        query.fields().include("element.hostname");

        // Determine the total number of document this affects

        final AtomicLong currentResult = new AtomicLong(0L);

        // Then skip to it and get going

        query.skip(rangeStart);

        if (rangeEnd != null)
            query.limit(rangeEnd - rangeStart);

        if (logger.isInfoEnabled())
            logger.info(String.format("Processing ranges %d to %s of (unknown) results through the given processor",
                    rangeStart, rangeEnd != null ? rangeEnd.toString() : "end"));

        mongoTemplate.executeQuery(query, defaultCollection, new DocumentCallbackHandler() {
            @Override
            public void processDocument(final DBObject dbObject) throws MongoException, DataAccessException {
                CompositeState currentState = CompositeState.valueOf((String) dbObject.get("currentState"));

                if (!EnumSet.of(CompositeState.Content, CompositeState.CompletedDocument,
                        CompositeState.InitialDocument).contains(currentState)) {
                    if (logger.isDebugEnabled()) {
                        ObjectId id = (ObjectId) dbObject.get("_id");

                        logger.debug("Skipping over element with ID '" + id + "' and current state '" + currentState
                                + "'");
                    }

                    return;
                }

                ObjectId id = (ObjectId) dbObject.get("_id");
                String hostname = (String) ((BasicDBObject) dbObject.get("element")).get("hostname");

                if (logger.isInfoEnabled())
                    logger.info("Processing re-indexing entry " + currentResult.getAndIncrement()
                            + " / (unknown) with ID '" + id + "' and hostname '" + hostname + "'");

                processor.process(id, hostname);
            }
        });

        return true;
    }

    /**
     * Create a query given any or all of the provided parameters.
     *
     * @param allStates
     * @param collection
     * @param sourceId
     * @param taxonomyMatch
     * @param startDate
     * @param endDate
     * @param url
     * @return Query
     */
    private Query createQuery(final Boolean allStates, final String collection, final Integer sourceId,
            final String taxonomyMatch, final Date startDate, final Date endDate, final String url,
            final Map<String, String> parameters) {
        Query query = new Query();

        if (!allStates)
            query.addCriteria(
                    new Criteria().orOperator(where("currentState").is(CompositeState.CompletedDocument.name()),
                            where("currentState").is(CompositeState.InitialDocument.name())));

        if (collection != null)
            query.addCriteria(where("source.collection").is(collection));
        if (sourceId != null)
            query.addCriteria(where("source.id").is(sourceId));
        if (taxonomyMatch != null)
            query.addCriteria(where("originalContent.hostname").is(taxonomyMatch));

        if (startDate != null || endDate != null) {
            Criteria dateCriteria = where("originalContent.dateSubmitted");

            if (startDate != null)
                dateCriteria = dateCriteria.gte(startDate);
            if (endDate != null)
                dateCriteria = dateCriteria.lt(endDate);

            query.addCriteria(dateCriteria);
        }

        if (parameters != null && parameters.size() > 0)
            for (Map.Entry<String, String> parameter : parameters.entrySet()) {
                if (parameter.getKey().contains(".") || parameter.getKey().contains("$"))
                    throw new IllegalStateException("Can't add criteria for parameter '" + parameter.getKey()
                            + "' because it contains invalid characters");

                query.addCriteria(
                        where("source.resultParameters." + StringEscapeUtils.escapeJavaScript(parameter.getKey()))
                                .is(parameter.getValue()));
            }

        if (url != null)
            query.addCriteria(where("originalContent.uri").is(url));

        return query;
    }
}