org.opencms.search.galleries.CmsGallerySearchIndex.java Source code

Java tutorial

Introduction

Here is the source code for org.opencms.search.galleries.CmsGallerySearchIndex.java

Source

/*
 * This library is part of OpenCms -
 * the Open Source Content Management System
 *
 * Copyright (c) Alkacon Software GmbH (http://www.alkacon.com)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * For further information about Alkacon Software, please see the
 * company website: http://www.alkacon.com
 *
 * For further information about OpenCms, please see the
 * project website: http://www.opencms.org
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.opencms.search.galleries;

import org.opencms.ade.galleries.shared.CmsGallerySearchScope;
import org.opencms.file.CmsObject;
import org.opencms.file.CmsPropertyDefinition;
import org.opencms.file.CmsResource;
import org.opencms.file.types.CmsResourceTypeXmlContainerPage;
import org.opencms.file.types.CmsResourceTypeXmlContent;
import org.opencms.file.types.CmsResourceTypeXmlPage;
import org.opencms.i18n.CmsLocaleManager;
import org.opencms.main.CmsException;
import org.opencms.main.CmsIllegalArgumentException;
import org.opencms.main.CmsLog;
import org.opencms.main.OpenCms;
import org.opencms.search.CmsSearchException;
import org.opencms.search.CmsSearchIndex;
import org.opencms.search.CmsSearchParameters;
import org.opencms.search.Messages;
import org.opencms.search.documents.I_CmsDocumentFactory;
import org.opencms.search.documents.I_CmsTermHighlighter;
import org.opencms.util.CmsUUID;

import java.util.ArrayList;
import java.util.List;
import java.util.Locale;

import org.apache.commons.logging.Log;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanFilter;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FilterClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermsFilter;
import org.apache.lucene.search.TopDocs;

/**
 * Implements the search within a the gallery index.<p>
 * 
 * @since 8.0.0 
 */
public class CmsGallerySearchIndex extends CmsSearchIndex {

    /** The system galleries path. */
    public static final String FOLDER_SYSTEM_GALLERIES = "/system/galleries/";

    /** The system modules folder path. */
    public static final String FOLDER_SYTEM_MODULES = "/system/modules/";

    /** The advanced gallery index name. */
    public static final String GALLERY_INDEX_NAME = "Gallery Index";

    /** The gallery document type name for xml-contents. */
    public static final String TYPE_XMLCONTENT_GALLERIES = "xmlcontent-galleries";

    /** The gallery document type name for xml-pages. */
    public static final String TYPE_XMLPAGE_GALLERIES = "xmlpage-galleries";

    /** The log object for this class. */
    private static final Log LOG = CmsLog.getLog(CmsGallerySearchIndex.class);

    /**
     * Default constructor only intended to be used by the XML configuration. <p>
     * 
     * It is recommended to use the constructor <code>{@link #CmsGallerySearchIndex(String)}</code> 
     * as it enforces the mandatory name argument. <p>
     * 
     */
    public CmsGallerySearchIndex() {

        super();
        setRequireViewPermission(true);
    }

    /**
     * Creates a new gallery search index with the given name.<p>
     * 
     * @param name the system-wide unique name for the search index 
     * 
     * @throws CmsIllegalArgumentException if the given name is null, empty or already taken by another search index 
     * 
     */
    public CmsGallerySearchIndex(String name) throws CmsIllegalArgumentException {

        super();
        setName(name);
        setRequireViewPermission(true);
    }

    /**
     * Returns the Lucene document with the given structure id from the index.<p>
     * 
     * @param structureId the structure id of the document to retrieve  
     * 
     * @return the Lucene document with the given root path from the index
     * 
     * @deprecated Use {@link #getDocument(String, String)} instead and provide {@link CmsGallerySearchFieldMapping#FIELD_RESOURCE_STRUCTURE_ID} as field to search in
     */
    @Deprecated
    public Document getDocument(CmsUUID structureId) {

        return getDocument(CmsGallerySearchFieldMapping.FIELD_RESOURCE_STRUCTURE_ID, structureId.toString());
    }

    /**
     * @see org.opencms.search.CmsSearchIndex#getDocumentFactory(org.opencms.file.CmsResource)
     */
    @Override
    public I_CmsDocumentFactory getDocumentFactory(CmsResource res) {

        if ((res != null) && (m_sources != null)) {
            // the result can only be null or the type configured for the resource
            if (CmsResourceTypeXmlContent.isXmlContent(res)
                    || CmsResourceTypeXmlContainerPage.isContainerPage(res)) {
                return OpenCms.getSearchManager().getDocumentFactory(TYPE_XMLCONTENT_GALLERIES, null);
            } else if (CmsResourceTypeXmlPage.isXmlPage(res)) {
                return OpenCms.getSearchManager().getDocumentFactory(TYPE_XMLPAGE_GALLERIES, null);
            } else {
                return super.getDocumentFactory(res);
            }
        }
        return null;
    }

    /**
     * Returns the language locale for the given resource in this index.<p>
     * 
     * @param cms the current OpenCms user context
     * @param resource the resource to check
     * @param availableLocales a list of locales supported by the resource
     * 
     * @return the language locale for the given resource in this index
     */
    @Override
    public Locale getLocaleForResource(CmsObject cms, CmsResource resource, List<Locale> availableLocales) {

        Locale result;
        List<Locale> defaultLocales = OpenCms.getLocaleManager().getDefaultLocales(cms, resource);
        if ((availableLocales != null) && (availableLocales.size() > 0)) {
            result = OpenCms.getLocaleManager().getBestMatchingLocale(defaultLocales.get(0), defaultLocales,
                    availableLocales);
        } else {
            result = defaultLocales.get(0);
        }
        return result;
    }

    /**
     * Gets the search roots to use for the given site/subsite parameters.<p>
     *  
     * @param scope the search scope
     * @param subSiteParam the current subsite
     *  
     * @return the list of search roots for that option 
     */
    public List<String> getSearchRootsForScope(CmsGallerySearchScope scope, String subSiteParam) {

        List<String> result = new ArrayList<String>();
        if (scope.isIncludeSite()) {
            result.add("/");
        }
        if (scope.isIncludeSubSite() && (subSiteParam != null)) {
            result.add(subSiteParam);
        }
        if (scope.isIncludeShared()) {
            String sharedFolder = OpenCms.getSiteManager().getSharedFolder();
            if (sharedFolder != null) {
                result.add(sharedFolder);
            }
        }
        if (scope == CmsGallerySearchScope.siteShared) {
            result.add(FOLDER_SYTEM_MODULES);
            result.add(FOLDER_SYSTEM_GALLERIES);
        }
        return result;
    }

    /**
     * Performs a search on the gallery index.<p>
     * 
     * @param cms the current users OpenCms context
     * @param params the parameters to use for the search
     * 
     * @return the List of results found
     * 
     * @throws CmsSearchException if something goes wrong
     */
    public synchronized CmsGallerySearchResultList searchGallery(CmsObject cms, CmsGallerySearchParameters params)
            throws CmsSearchException {

        // the hits found during the search
        TopDocs hits;

        // storage for the results found
        CmsGallerySearchResultList searchResults = new CmsGallerySearchResultList();

        try {
            // copy the user OpenCms context
            CmsObject searchCms = OpenCms.initCmsObject(cms);
            // make sure to keep the request time when evaluating resource expiration
            searchCms.getRequestContext().setRequestTime(cms.getRequestContext().getRequestTime());

            // change the project     
            searchCms.getRequestContext().setCurrentProject(searchCms.readProject(getProject()));

            // several search options are searched using filters
            BooleanFilter filter = new BooleanFilter();
            // append root path filter
            List<String> folders = new ArrayList<String>();

            if (params.getFolders() != null) {
                folders.addAll(params.getFolders());
            }
            if (params.getGalleries() != null) {
                folders.addAll(params.getGalleries());
            }
            filter = appendPathFilter(searchCms, filter, folders);

            String subsite = null;
            if (params.getReferencePath() != null) {
                subsite = OpenCms.getADEManager().getSubSiteRoot(cms,
                        cms.getRequestContext().addSiteRoot(params.getReferencePath()));
                if (subsite != null) {
                    subsite = cms.getRequestContext().removeSiteRoot(subsite);
                }
            }
            List<String> scopeFolders = getSearchRootsForScope(params.getScope(), subsite);
            filter = appendPathFilter(searchCms, filter, scopeFolders);

            // append category filter
            filter = appendCategoryFilter(searchCms, filter, params.getCategories());
            // append container type filter
            filter = appendContainerTypeFilter(searchCms, filter, params.getContainerTypes());
            // append resource type filter
            filter = appendResourceTypeFilter(searchCms, filter, params.getResourceTypes());
            // append locale filter
            filter = appendLocaleFilter(searchCms, filter, params.getLocale());
            // append date last modified filter            
            filter = appendDateLastModifiedFilter(filter, params.getDateLastModifiedRange().getStartTime(),
                    params.getDateLastModifiedRange().getEndTime());
            // append date created filter
            filter = appendDateCreatedFilter(filter, params.getDateCreatedRange().getStartTime(),
                    params.getDateCreatedRange().getEndTime());

            // the search query to use, will be constructed in the next lines 
            Query query = null;
            // store separate fields query for excerpt highlighting  
            Query fieldsQuery = null;

            // get an index searcher that is certainly up to date
            indexSearcherUpdate();
            IndexSearcher searcher = getSearcher();

            Locale locale = params.getLocale() == null ? null : CmsLocaleManager.getLocale(params.getLocale());
            if (params.getSearchWords() != null) {
                // this search contains a full text search component
                BooleanQuery booleanFieldsQuery = new BooleanQuery();
                OpenCms.getLocaleManager();
                // extend the field names with the locale information
                List<String> fields = params.getFields();
                fields = getLocaleExtendedFields(params.getFields(), locale);
                // add one sub-query for each of the selected fields, e.g. "content", "title" etc.                
                for (String field : fields) {
                    QueryParser p = new QueryParser(CmsSearchIndex.LUCENE_VERSION, field, getAnalyzer());
                    booleanFieldsQuery.add(p.parse(params.getSearchWords()), BooleanClause.Occur.SHOULD);
                }
                fieldsQuery = searcher.rewrite(booleanFieldsQuery);
            }

            // finally set the main query to the fields query
            // please note that we still need both variables in case the query is a MatchAllDocsQuery - see below
            query = fieldsQuery;

            if (query == null) {
                // if no text query is set, then we match all documents 
                query = new MatchAllDocsQuery();
            }

            // perform the search operation          
            searcher.setDefaultFieldSortScoring(true, true);
            hits = searcher.search(query, filter, getMaxHits(), params.getSort());

            if (hits != null) {
                int hitCount = hits.totalHits > hits.scoreDocs.length ? hits.scoreDocs.length : hits.totalHits;
                int page = params.getResultPage();
                int start = -1, end = -1;
                if ((params.getMatchesPerPage() > 0) && (page > 0) && (hitCount > 0)) {
                    // calculate the final size of the search result
                    start = params.getMatchesPerPage() * (page - 1);
                    end = start + params.getMatchesPerPage();
                    // ensure that both i and n are inside the range of foundDocuments.size()
                    start = (start > hitCount) ? hitCount : start;
                    end = (end > hitCount) ? hitCount : end;
                } else {
                    // return all found documents in the search result
                    start = 0;
                    end = hitCount;
                }

                Document doc;
                CmsGallerySearchResult searchResult;
                CmsSearchParameters searchParams = params.getCmsSearchParams();

                int visibleHitCount = hitCount;
                for (int i = 0, cnt = 0; (i < hitCount) && (cnt < end); i++) {
                    try {
                        doc = searcher.doc(hits.scoreDocs[i].doc);
                        if (hasReadPermission(searchCms, doc)) {
                            // user has read permission
                            if (cnt >= start) {
                                // do not use the resource to obtain the raw content, read it from the lucene document!
                                String excerpt = null;
                                if (isCreatingExcerpt() && (fieldsQuery != null)) {
                                    I_CmsTermHighlighter highlighter = OpenCms.getSearchManager().getHighlighter();
                                    excerpt = highlighter.getExcerpt(doc, this, searchParams, fieldsQuery,
                                            getAnalyzer());
                                }
                                searchResult = new CmsGallerySearchResult(
                                        Math.round((hits.scoreDocs[i].score / hits.getMaxScore()) * 100f), doc,
                                        excerpt, locale);
                                searchResults.add(searchResult);
                            }
                            cnt++;
                        } else {
                            visibleHitCount--;
                        }
                    } catch (Exception e) {
                        // should not happen, but if it does we want to go on with the next result nevertheless                        
                        if (LOG.isWarnEnabled()) {
                            LOG.warn(Messages.get().getBundle().key(Messages.LOG_RESULT_ITERATION_FAILED_0), e);
                        }
                    }
                }

                // save the total count of search results
                searchResults.setHitCount(visibleHitCount);
            } else {
                searchResults.setHitCount(0);
            }

        } catch (RuntimeException e) {
            throw new CmsSearchException(Messages.get().container(Messages.ERR_SEARCH_PARAMS_1, params), e);
        } catch (Exception e) {
            throw new CmsSearchException(Messages.get().container(Messages.ERR_SEARCH_PARAMS_1, params), e);
        }

        return searchResults;
    }

    /**
     * Appends a container type filter to the given filter clause that matches all given container types.<p>
     * 
     * In case the provided List is null or empty, the original filter is left unchanged.<p>
     * 
     * The original filter parameter is extended and also provided as return value.<p> 
     * 
     * @param cms the current OpenCms search context
     * @param filter the filter to extend
     * @param containers the containers that will compose the filter
     * 
     * @return the extended filter clause
     */
    protected BooleanFilter appendContainerTypeFilter(CmsObject cms, BooleanFilter filter,
            List<String> containers) {

        if ((containers != null) && (containers.size() > 0)) {
            // add query categories (if required)
            filter.add(new FilterClause(
                    getMultiTermQueryFilter(CmsGallerySearchFieldMapping.FIELD_CONTAINER_TYPES, containers),
                    BooleanClause.Occur.MUST));
        }

        return filter;
    }

    /**
     * Appends the locale filter to the given filter clause that matches the given locale.<p>
     * 
     * In case the provided List is null or empty, the original filter is left unchanged.<p>
     * 
     * The original filter parameter is extended and also provided as return value.<p> 
     * 
     * @param cms the current OpenCms search context
     * @param filter the filter to extend
     * @param locale the locale that will compose the filter
     * 
     * @return the extended filter clause
     */
    protected BooleanFilter appendLocaleFilter(CmsObject cms, BooleanFilter filter, String locale) {

        if (locale != null) {
            // add query categories (if required)
            filter.add(new FilterClause(
                    getTermQueryFilter(CmsGallerySearchFieldMapping.FIELD_RESOURCE_LOCALES, locale),
                    BooleanClause.Occur.MUST));
        }

        return filter;
    }

    /**
     * Appends the a VFS path filter to the given filter clause that matches all given root paths.<p>
     * 
     * In case the provided List is null or empty, the current request context site root is appended.<p>
     * 
     * The original filter parameter is extended and also provided as return value.<p> 
     * 
     * @param cms the current OpenCms search context
     * @param filter the filter to extend
     * @param roots the VFS root paths that will compose the filter
     * 
     * @return the extended filter clause
     */
    @Override
    protected BooleanFilter appendPathFilter(CmsObject cms, BooleanFilter filter, List<String> roots) {

        // complete the search root
        TermsFilter pathFilter = new TermsFilter();
        String sharedFolder = OpenCms.getSiteManager().getSharedFolder();
        if ((roots != null) && (roots.size() > 0)) {
            // add the all configured search roots with will request context
            for (int i = 0; i < roots.size(); i++) {
                String searchRoot = roots.get(i);
                if (!searchRoot.startsWith(FOLDER_SYTEM_MODULES) && !searchRoot.startsWith(FOLDER_SYSTEM_GALLERIES)
                        && ((sharedFolder == null)
                                || !searchRoot.startsWith(OpenCms.getSiteManager().getSharedFolder()))) {
                    searchRoot = cms.getRequestContext().addSiteRoot(roots.get(i));
                }
                extendPathFilter(pathFilter, searchRoot);
            }
        } else {
            // use the current site root as the search root
            extendPathFilter(pathFilter, cms.getRequestContext().getSiteRoot());
            // also add the shared folder (v 8.0)
            extendPathFilter(pathFilter, OpenCms.getSiteManager().getSharedFolder());
            extendPathFilter(pathFilter, FOLDER_SYTEM_MODULES);
            extendPathFilter(pathFilter, FOLDER_SYSTEM_GALLERIES);
        }

        // add the calculated path filter for the root path
        filter.add(new FilterClause(pathFilter, BooleanClause.Occur.MUST));
        return filter;
    }

    /**
     * Checks if the provided resource should be excluded from this search index.<p> 
     *
     * With the introduction of the gallery search index in OpenCms 8, the meaning 
     * of the VFS property <code>search.exclude</code> that controls
     * if a resource is included in a search index has been extended.<p>
     *
     * The following uses cases can be covered with the property:<p>
     *
     * <dl>
     * <dt>Case A: Exclude from all indexes</dt>
     *      <dd>Applies at least to ADE resource type copy templates.<br>
     *      Set <code>search.exclude=all</code>
     *      </dd>
     *      
     * <dt>Case B: Include in all indexes</dt>
     *      <dd>Applies to most resources e.g. news articles etc.<br>
     *      Set <code>search.exclude=false</code> - or anything else but <code>all|true|gallery</code>.
     *      This is also the default in case the property is not set at all.
     *      </dd>
     *      
     * <dt>Case D: Include in gallery index, but exclude in standard index</dt>
     *      <dd>Applies to content like articles that are displayed only in container pages,
     *          also applies to "list generating" resource types like those that contain settings for a collector.<br>
     *      Set <code>search.exclude=true</code> - This is the behavior before OpenCms v8.
     *      </dd>
     *       
     * <dt>Case C: Exclude from gallery index, but include in standard index</dt>
     *      <dd>Use case so far unknown, but implemented anyway.<br>
     *      Set <code>search.exclude=gallery</code>.
     *      </dd> 
     * </dl>
     * 
     * @param cms the OpenCms context used for building the search index
     * @param resource the resource to index
     * 
     * @return true if the resource should be excluded, false if it should be included in this index
     */
    @Override
    protected boolean excludeFromIndex(CmsObject cms, CmsResource resource) {

        if (resource.isFolder() || resource.isTemporaryFile()) {
            // don't index  folders or temporary files for galleries, but pretty much everything else
            return true;
        }
        boolean excludeFromIndex = false;
        try {
            // do property lookup with folder search
            String propValue = cms.readPropertyObject(resource, CmsPropertyDefinition.PROPERTY_SEARCH_EXCLUDE, true)
                    .getValue();
            if (propValue != null) {
                propValue = propValue.trim();
                // property value was neither "true" nor null, must check for "all"
                excludeFromIndex = PROPERTY_SEARCH_EXCLUDE_VALUE_ALL.equalsIgnoreCase(propValue)
                        || PROPERTY_SEARCH_EXCLUDE_VALUE_GALLERY.equalsIgnoreCase(propValue);
            }
        } catch (CmsException e) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(Messages.get().getBundle().key(Messages.LOG_UNABLE_TO_READ_PROPERTY_1,
                        resource.getRootPath()));
            }
        }
        return excludeFromIndex;
    }

    /**
     * Returns a list of locale extended field names.<p>
     * 
     * @param fields the field name to extend
     * @param locale the locale to extend the field names with
     * 
     * @return a list of locale extended field names
     */
    protected List<String> getLocaleExtendedFields(List<String> fields, Locale locale) {

        List<String> result = new ArrayList<String>(fields.size() * 2);
        for (String fieldName : fields) {
            result.add(fieldName);
            if (locale != null) {
                result.add(CmsGallerySearchFieldConfiguration.getLocaleExtendedName(fieldName, locale));
            } else {
                for (Locale l : OpenCms.getLocaleManager().getAvailableLocales()) {
                    result.add(CmsGallerySearchFieldConfiguration.getLocaleExtendedName(fieldName, l));
                }
            }
        }
        return result;
    }

}