fr.gael.dhus.service.SearchService.java Source code

Java tutorial

Introduction

Here is the source code for fr.gael.dhus.service.SearchService.java

Source

/*
 * Data Hub Service (DHuS) - For Space data distribution.
 * Copyright (C) 2013,2014,2015,2016 GAEL Systems
 *
 * This file is part of DHuS software sources.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
package fr.gael.dhus.service;

import java.io.IOException;
import java.util.AbstractList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.Suggestion;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.stereotype.Service;

import fr.gael.dhus.database.object.Collection;
import fr.gael.dhus.database.object.MetadataIndex;
import fr.gael.dhus.database.object.Product;
import fr.gael.dhus.search.DHusSearchException;
import fr.gael.dhus.search.SolrDao;
import fr.gael.dhus.service.metadata.MetadataType;
import fr.gael.dhus.service.metadata.SolrField;

@Service
public class SearchService extends WebService {
    /** Logger. */
    private static final Logger LOGGER = LogManager.getLogger(SearchService.class);

    /** Autowired dependency. */
    @Autowired
    private SolrDao solrDao;

    /** Autowired dependency. */
    @Autowired
    private CollectionService collectionService;

    /** Autowired dependency. */
    @Autowired
    private ProductService productService;

    /** Autowired dependency. */
    @Autowired
    private MetadataTypeService metadataTypeService;

    /**
     * Indexes or Reindexes a product.
     * {@link Product#getId()} is the unique key in the index.
     * @param product a product.
     */
    public void index(Product product) {
        try {
            long start = System.currentTimeMillis();

            SolrInputDocument doc = toInputDocument(product);
            LOGGER.debug("Indexing product '" + product.getPath() + "'");

            try {
                solrDao.index(doc);
            } catch (IOException | SolrServerException e) {
                LOGGER.error("Cannot index product", e);
                return;
            }

            long end = System.currentTimeMillis();
            LOGGER.info("Indexed product in  " + (end - start) + "ms");
        } catch (Exception e) {
            LOGGER.error("Cannot index product", e);
        }
    }

    /**
     * Removes the given product from the index.
     * @param product to remove.
     */
    public void remove(Product product) {
        try {
            solrDao.remove(product.getId());
        } catch (Exception ex) {
            LOGGER.error("Cannot remove product " + product.getIdentifier() + "from index", ex);
        }
    }

    /**
     * Updates the given product from the index.
     * @param product to update.
     */
    public void update(Product product) {
        index(product);
    }

    /**
     * Paginated search for system operations.
     * @param query Solr query `q` parameter.
     * @return an iterator of found products.
     */
    public Iterator<Product> search(String query) {
        try {
            final Iterator<SolrDocument> it = solrDao.scroll(new SolrQuery(query));

            return new Iterator<Product>() {
                @Override
                public boolean hasNext() {
                    return it.hasNext();
                }

                @Override
                public Product next() {
                    return productService.getProduct((Long) it.next().get("id"));
                }

                @Override
                public void remove() {
                    productService.deleteProduct((Long) it.next().get("id"));
                }
            };
        } catch (IOException | SolrServerException ex) {
            LOGGER.error("An exception occured while searching", ex);
        }
        return Collections.EMPTY_LIST.iterator();
    }

    /**
     * Search.
     * <p>
     * Set `start` and `rows` values in the SolrQuery parameter to paginate the results.<br>
     * <strong>If no `rows` have been set, solr will only return 10 documents, no more.</strong>
     * <p>
     * To get the total number of document matching the given query, use {@code res.getNumFound()}.
     *
     * @param query a SolrQuery with at least a 'q' parameter set.
     * @return A list of solr document matching the given query.
     */
    @PreAuthorize("hasRole('ROLE_SEARCH')")
    public SolrDocumentList search(SolrQuery query) {
        Objects.requireNonNull(query);

        query.setQuery(solrDao.updateQuery(query.getQuery()));
        try {
            return solrDao.search(query).getResults();
        } catch (SolrServerException | IOException ex) {
            LOGGER.error(ex);
            throw new DHusSearchException("An exception occured while searching", ex);
        }
    }

    /**
     * Returns the product associated with the given solr document.
     * @param doc Index entry for a product, are returned by {@link #search(SolrQuery)}.
     * @return A product (database object).
     */
    public Product asProduct(SolrDocument doc) {
        Long pid = Long.class.cast(doc.get("id"));
        return productService.getProduct(pid);
    }

    /**
     * Returns how many solr documents match the given query.
     * @param query a solr `q` query.
     * @return solr document count.
     * @deprecated use {@link #search(SolrQuery)}{@code .getNumFound()}.
     */
    @Deprecated
    @PreAuthorize("hasRole('ROLE_SEARCH')")
    public int getResultCount(String query) {
        try {
            query = solrDao.updateQuery(query);
            return (int) solrDao.search(new SolrQuery(query)).getResults().getNumFound();
        } catch (SolrServerException | IOException ex) {
            LOGGER.error(ex);
            throw new DHusSearchException("An exception occured while searching", ex);
        }
    }

    /**
     * Returns a list of suggestions for the given input.
     * @param input search input.
     * @return list of suggestions.
     */
    @PreAuthorize("hasRole('ROLE_SEARCH')")
    public List<String> getSuggestions(String input) {
        try {
            final List<Suggestion> sggs = solrDao.getSuggestions(input).getSuggestions().get("suggest");
            return new AbstractList<String>() {
                @Override
                public String get(int index) {
                    return sggs.get(index).getTerm();
                }

                @Override
                public int size() {
                    return sggs.size();
                }
            };
        } catch (IOException | SolrServerException ex) {
            LOGGER.error("Cannot get suggestions from Solr", ex);
        }
        return Collections.emptyList();
    }

    /**
     * Integrity check.
     */
    public void checkIndex() {
        try {
            SolrQuery query = new SolrQuery("*:*");
            query.setFilterQueries("*");
            query.setStart(0);
            Iterator<SolrDocument> it = solrDao.scroll(query);
            while (it.hasNext()) {
                SolrDocument doc = it.next();
                Long pid = (Long) doc.get("id");
                Product product = productService.systemGetProduct(pid);
                if (product == null) {
                    Long id = (Long) doc.getFieldValue("id");
                    LOGGER.warn("Removing unknown product " + id + " from solr index");
                    try {
                        solrDao.remove(id);
                        // decrease the offset, because a product has been removed
                        query.setStart(query.getStart() - 1);
                    } catch (IOException e) {
                        LOGGER.error("Cannot remove Solr entry " + id, e);
                    }
                }
            }
        } catch (IOException | SolrServerException ex) {
            LOGGER.error("Cannot check the index", ex);
        }
    }

    /**
     * Optimize the index, merges every segment of the index into one monolithic file.
     * Optimizing is very expensive, and if the index is constantly changing,
     * the slight performance boost will not last long...
     * The tradeoff is not often worth it for a non static index.
     * <p>
     * Blocking method, will block until optimization is complete. Solr won't respond to
     * search queries until optimization is done.
     */
    public void optimizeIndex() {
        try {
            solrDao.optimize();
        } catch (IOException | SolrServerException ex) {
            LOGGER.error("Cannot optimize index", ex);
        }
    }

    /**
     * Wipes the current index and reindex everything from the DataBase.
     */
    public void fullReindex() {
        try {
            solrDao.removeAll();

            long start = System.currentTimeMillis();

            final Iterator<Product> products = productService.systemGetProducts(null, null, 0);

            if (!products.hasNext()) {
                LOGGER.warn("Reindex: table PRODUCTS is empty, aborting...");
                return;
            }

            // Makes an adaptor for SolrDao#batchIndex(...)
            Iterator<SolrInputDocument> it = new Iterator<SolrInputDocument>() {
                @Override
                public boolean hasNext() {
                    return products.hasNext();
                }

                @Override
                public SolrInputDocument next() {
                    Product product = products.next();
                    product.setIndexes(productService.getIndexes(product.getId()));
                    return toInputDocument(product);
                }

                @Override
                public void remove() {
                    throw new UnsupportedOperationException("Do not use remove().");
                }
            };

            // Best config for bulk reindex
            // see: http://lucidworks.com/blog/2013/08/23/understanding-transaction-logs-softcommit-and-commit-in-sorlcloud/
            Map<String, String> config = new HashMap<>();
            config.put("updateHandler.autoSoftCommit.maxDocs", "-1"); // Opens a new searcher (the slowest operation).
            config.put("updateHandler.autoSoftCommit.maxTime", "-1"); // Opens a new searcher (the slowest operation).
            config.put("updateHandler.autoCommit.maxDocs", "-1"); // Time based autocommit is better.
            config.put("updateHandler.autoCommit.maxTime", "60000"); // 1 minute, controls the size of tlog files.
            config.put("updateHandler.autoCommit.openSearcher", "false"); // Opens a new searcher (the slowest operation).
            solrDao.setProperties(config);

            solrDao.batchIndex(it);
            solrDao.optimize();

            solrDao.unsetProperties(config.keySet());

            LOGGER.info("Full reindex done in " + (System.currentTimeMillis() - start) + "ms");
        } catch (IOException | SolrServerException ex) {
            LOGGER.error("Failed to reindex", ex);
        }
    }

    /**
     * Makes a SolrInputDocument from a Product database object.
     * The returned document can be indexed as is.
     * @param product to convert.
     * @return an indexable solr document.
     */
    private SolrInputDocument toInputDocument(Product product) {
        String path = product.getPath().toString();
        if (path.startsWith("/")) // FIXME: should be done by the ingestion process!
        {
            path = "file:/" + path;
        }

        SolrInputDocument doc = new SolrInputDocument();

        // Metadatas
        List<MetadataIndex> indices = product.getIndexes();
        if (indices != null && !indices.isEmpty()) {
            for (MetadataIndex index : indices) {
                String type = index.getType();

                // Only textual information stored in field contents (full-text search)
                if ((type == null) || type.isEmpty() || "text/plain".equals(type)) {
                    doc.addField("contents", index.getValue());
                }

                // next line is considered bad practice:
                //doc.addField("contents", index.getQueryable());

                MetadataType mt = metadataTypeService.getMetadataTypeByName(product.getItemClass(),
                        index.getName());
                SolrField sf = (mt != null) ? mt.getSolrField() : null;

                if (sf != null || index.getQueryable() != null) {
                    Boolean is_multivalued = (sf != null) ? sf.isMultiValued() : null;
                    String field_name = (sf != null) ? sf.getName() : index.getQueryable().toLowerCase();

                    if (is_multivalued != null && is_multivalued) {
                        doc.addField(field_name, index.getValue());
                    } else {
                        doc.setField(field_name, index.getValue());
                    }

                    //LOGGER.debug("Added " + field_name + ":" + index.getValue());
                }
            }
        } else {
            LOGGER.warn("Product '" + product.getIdentifier() + "' contains no metadata");
        }

        // DHuS Attributes
        doc.setField("id", product.getId());
        doc.setField("uuid", product.getUuid());
        doc.setField("path", path);

        // Collections
        List<Collection> collections = collectionService.getCollectionsOfProduct(product);
        if (collections != null && !collections.isEmpty()) {
            for (Collection collection : collections) {
                doc.addField("collection", collection.getName());
            }
        }

        return doc;
    }
}