dk.dma.msinm.service.MessageSearchService.java Source code

Java tutorial

Introduction

Here is the source code for dk.dma.msinm.service.MessageSearchService.java

Source

/* Copyright (c) 2011 Danish Maritime Authority
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this library.  If not, see <http://www.gnu.org/licenses/>.
 */
package dk.dma.msinm.service;

import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.shape.Shape;
import dk.dma.msinm.common.MsiNmApp;
import dk.dma.msinm.common.db.PredicateHelper;
import dk.dma.msinm.common.model.DataFilter;
import dk.dma.msinm.common.settings.annotation.Setting;
import dk.dma.msinm.common.util.TextUtils;
import dk.dma.msinm.lucene.AbstractLuceneIndex;
import dk.dma.msinm.model.Area;
import dk.dma.msinm.model.AreaDesc;
import dk.dma.msinm.model.Category;
import dk.dma.msinm.model.CategoryDesc;
import dk.dma.msinm.model.Chart;
import dk.dma.msinm.model.Location;
import dk.dma.msinm.model.LocationDesc;
import dk.dma.msinm.model.Message;
import dk.dma.msinm.model.PointDesc;
import dk.dma.msinm.model.SeriesIdentifier;
import dk.dma.msinm.model.Type;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.ChainedFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.slf4j.Logger;

import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import javax.ejb.Lock;
import javax.ejb.LockType;
import javax.ejb.Schedule;
import javax.ejb.Singleton;
import javax.ejb.Startup;
import javax.inject.Inject;
import javax.persistence.EntityManager;
import javax.persistence.Tuple;
import javax.persistence.criteria.CriteriaBuilder;
import javax.persistence.criteria.CriteriaQuery;
import javax.persistence.criteria.Join;
import javax.persistence.criteria.JoinType;
import javax.persistence.criteria.Predicate;
import javax.persistence.criteria.Root;
import javax.persistence.criteria.Selection;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * Lucene search index for {@code Message} entities
 */
@Singleton
@Lock(LockType.READ)
@Startup
public class MessageSearchService extends AbstractLuceneIndex<Message> {

    final static String SEARCH_FIELD = "message";
    final static String LOCATION_FIELD = "location";
    final static String STATUS_FIELD = "status";

    @Inject
    EntityManager em;

    @Inject
    Logger log;

    @Inject
    MsiNmApp app;

    @Inject
    MessageService messageService;

    @Inject
    CategoryService categoryService;

    @Inject
    @Setting(value = "messageIndexMaxMessageNo", defaultValue = "1000")
    Long maxMessageNo;

    @Inject
    @Setting(value = "messageIndexDir", defaultValue = "${user.home}/.msinm/msg-index", substituteSystemProperties = true)
    Path indexFolder;

    @Inject
    @Setting(value = "messageIndexSpatialLevels", defaultValue = "11")
    Long maxSpatialLevels; // a value of 11 results in sub-meter precision for geohash

    @Inject
    @Setting(value = "messageIndexDeleteOnStartup", defaultValue = "true")
    Boolean deleteOnStartup;

    SpatialStrategy strategy;
    boolean allIndexed;

    /**
     * Initialize the index
     */
    @PostConstruct
    public void init() {
        // Create the lucene index directory
        if (!Files.exists(indexFolder)) {
            try {
                Files.createDirectories(indexFolder);
            } catch (IOException e) {
                log.error("Error creating index dir " + indexFolder, e);
            }
        }

        // Initialize the spatial strategy
        SpatialPrefixTree grid = new GeohashPrefixTree(SpatialContext.GEO, maxSpatialLevels.intValue());
        strategy = new RecursivePrefixTreeStrategy(grid, LOCATION_FIELD);

        // Check if we need to delete the old index on start-up
        if (deleteOnStartup) {
            try {
                deleteIndex();
            } catch (IOException e) {
                log.error("Failed re-creating the index on startup", e);
            }
        }
    }

    /**
     * Returns if all messages have been indexed
     * @return if all messages have been indexed
     */
    public boolean isAllIndexed() {
        return allIndexed;
    }

    /**
     * Returns the language specific language field
     * @param language the language
     * @return the language specific language field
     */
    private String searchField(String language) {
        return SEARCH_FIELD + "_" + app.getLanguage(language);
    }

    /**
     * Clean up Lucene index
     */
    @PreDestroy
    public void closeIndex() {
        closeReader();
    }

    /**
     * Called every minute to update the Lucene index
     */
    @Schedule(persistent = false, second = "38", minute = "*/1", hour = "*", dayOfWeek = "*", year = "*")
    public int updateLuceneIndex() {
        return updateLuceneIndex(MAX_INDEX_COUNT, false);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    protected Path getIndexFolder() {
        return indexFolder;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    protected List<Message> findUpdatedEntities(Date fromDate, int maxCount) {
        List<Message> messages = messageService.findUpdatedMessages(fromDate, maxCount);

        // The first time less that the maximum number of messages are found,
        // we flag that the indexing is complete
        if (messages.size() < maxCount) {
            allIndexed = true;
        }

        return messages;
    }

    /**
     * Adds a shape to the document
     * @param doc the Lucene document
     * @param shape the shape to add
     * @return the updated document
     */
    private Document addShapeSearchFields(Document doc, Shape shape) {
        for (IndexableField f : strategy.createIndexableFields(shape)) {
            doc.add(f);
        }
        doc.add(new StoredField(strategy.getFieldName(), shape.toString()));
        return doc;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    protected boolean shouldAddEntity(Message entity) {
        return true;
        //return entity.getStatus() != Status.DELETED;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    protected void addEntityToDocument(Document doc, Message message) {

        // For each supported language, update a search field
        for (String language : app.getLanguages()) {
            String searchField = searchField(language);

            addPhraseSearchField(doc, searchField, message.getStatus());

            // Message series identifier
            addPhraseSearchField(doc, searchField, message.getSeriesIdentifier().getShortId()); // e.g. "DK-074-14"
            addPhraseSearchField(doc, searchField, message.getSeriesIdentifier().getFullId()); // e.g. "MSI-DK-074-14"
            addPhraseSearchField(doc, searchField, message.getSeriesIdentifier().getAuthority());
            addPhraseSearchField(doc, searchField, String.valueOf(message.getSeriesIdentifier().getYear()));
            if (message.getSeriesIdentifier().getNumber() != null) {
                addPhraseSearchField(doc, searchField, String.valueOf(message.getSeriesIdentifier().getNumber()));
            }

            // References
            message.getReferences().forEach(ref -> {
                addPhraseSearchField(doc, searchField, ref.getSeriesIdentifier().getShortId());
                addPhraseSearchField(doc, searchField, ref.getSeriesIdentifier().getFullId());
            });

            // Area
            for (Area area = message.getArea(); area != null; area = area.getParent()) {
                AreaDesc desc = area.getDesc(language);
                if (desc != null) {
                    addPhraseSearchField(doc, searchField, desc.getName());
                }
            }

            // Category
            message.getCategories().forEach(category -> {
                for (Category cat = category; cat != null; cat = cat.getParent()) {
                    CategoryDesc desc = cat.getDesc(language);
                    if (desc != null) {
                        addPhraseSearchField(doc, searchField, desc.getName());
                    }
                }
            });

            // Charts
            message.getCharts().forEach(chart -> {
                addPhraseSearchField(doc, searchField, chart.getChartNumber());
                addPhraseSearchField(doc, searchField, chart.getInternationalNumber());
            });

            // Horizontal datum
            addPhraseSearchField(doc, searchField, message.getHorizontalDatum());

            // Add language specific fields
            message.getDescs().forEach(desc -> {
                addPhraseSearchField(doc, searchField, desc.getTitle());
                addPhraseSearchField(doc, searchField, TextUtils.html2txt(desc.getDescription()));
                addPhraseSearchField(doc, searchField, desc.getNote());
                addPhraseSearchField(doc, searchField, desc.getOtherCategories());
                addPhraseSearchField(doc, searchField, desc.getVicinity());
                addPhraseSearchField(doc, searchField, desc.getPublication());
                addPhraseSearchField(doc, searchField, desc.getSource());
            });

            message.getLightsListNumbers()
                    .forEach(lightsListNumber -> addPhraseSearchField(doc, searchField, lightsListNumber));

            // Add descriptions for locations and points associated with the message.
            message.getLocations().forEach(location -> {
                LocationDesc locDesc = location.getDesc(language);
                if (locDesc != null) {
                    addPhraseSearchField(doc, searchField, locDesc.getDescription());
                }
                location.getPoints().forEach(point -> {
                    PointDesc pointDesc = point.getDesc(language);
                    if (pointDesc != null) {
                        addPhraseSearchField(doc, searchField, pointDesc.getDescription());
                    }
                });
            });
        }

        // Add the spatial data to the index
        message.getLocations().forEach(location -> {
            try {
                addShapeSearchFields(doc, location.toWkt());
            } catch (Exception e) {
                log.warn("Not indexing location for message " + message.getId() + " because of error " + e);
            }
        });
    }

    /**
     * Produces a chained lucene filter based on the location list
     * @param locations the list of locations to produce a filter for
     * @return the lucene filter or null if no locations are defiend.
     */
    public Filter getLocationFilter(List<Location> locations) throws ParseException {
        if (locations.size() == 0) {
            return null;
        }
        List<Filter> filters = new ArrayList<>();
        for (Location loc : locations) {
            SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects, loc.toWkt());
            filters.add(strategy.makeFilter(args));
        }
        return new ChainedFilter(filters.toArray(new Filter[filters.size()]), ChainedFilter.OR);
    }

    /**
     * Main search method
     *
     * @param param the search parameters
     * @return the resulting list of messages
     */
    public MessageSearchResult search(MessageSearchParams param) {
        long t0 = System.currentTimeMillis();
        MessageSearchResult result = new MessageSearchResult();
        result.setStartIndex(param.getStartIndex());

        try {

            // **********************************************************************************/
            // ********** Step 1: Fetch the paged list of message ID's                   ********/
            // **********************************************************************************/

            List<Integer> pagedMsgIds = searchPagedMessageIds(param, result);

            // **********************************************************************************/
            // ********** Step 2: Fetch messages with the paged set of id's             ********/
            // **********************************************************************************/

            // Check if the message number exceeds the maximum allowed message number
            if (param.isMapMode() && pagedMsgIds.size() > maxMessageNo.intValue()) {

                // Will typically only ever happen en Map view mode.
                // By flagging overflow, the client can e.g. show bitmap layer instead
                result.setOverflowed(true);

            } else {

                // Fetch the cached messages
                List<Message> messages = messageService.getCachedMessages(pagedMsgIds);

                DataFilter filter;
                if (param.isMapMode()) {
                    filter = DataFilter.get("Message.locations", "MessageDesc.title").setLang(param.getLanguage());
                } else {
                    filter = DataFilter
                            .get("Message.details", "Message.firingExercise", "Area.parent", "Category.parent")
                            .setLang(param.getLanguage());
                }
                result.addMessages(messages, messageService.getBookmarks(),
                        categoryService.findOrCreateFiringExercisesCategory(), filter);
            }

            log.trace("Message search result: " + result + " in " + (System.currentTimeMillis() - t0) + " ms");

            return result;

        } catch (Exception e) {
            log.error("Error performing search " + param + ": " + e, e);
            return result;
        }
    }

    /**
     * Searches out the ID's of the paged result set of messages defined by the search parameters.
     * Also fills out the total result count of the message search result.
     *
     * @param param the search parameters
     * @param result the search result to update with the total result count
     * @return the paged list of message ID's
     */
    List<Integer> searchPagedMessageIds(MessageSearchParams param, MessageSearchResult result) throws Exception {

        CriteriaBuilder builder = em.getCriteriaBuilder();
        CriteriaQuery<Tuple> tupleQuery = builder.createTupleQuery();

        // Select messages
        Root<Message> msgRoot = tupleQuery.from(Message.class);
        msgRoot.join("seriesIdentifier", JoinType.LEFT);
        javax.persistence.criteria.Path<SeriesIdentifier> msgId = msgRoot.get("seriesIdentifier");

        // Build the predicates based on the search parameters
        PredicateHelper<Tuple> tuplePredicateBuilder = new PredicateHelper<>(builder, tupleQuery)
                .equals(msgRoot.get("status"), param.getStatus())
                .between(msgRoot.get("created"), param.getFrom(), param.getTo())
                .between(msgRoot.get("updated"), param.getUpdatedFrom(), param.getUpdatedTo());

        // Compute the type closure
        Set<Type> types = new HashSet<>();
        types.addAll(param.getTypes());
        param.getMainTypes().forEach(mt -> {
            for (Type t : Type.values()) {
                if (t.getSeriesIdType() == mt) {
                    types.add(t);
                }
            }
        });

        if (types.size() > 0) {
            tuplePredicateBuilder.in(msgRoot.get("type"), types);
        }

        // Search the Lucene index for free text search and location information
        if (param.requiresLuceneSearch()) {
            Filter filter = null;
            if (param.getLocations() != null) {
                filter = getLocationFilter(param.getLocations());
            }
            List<Long> ids = searchIndex(param.getQuery(), searchField(param.getLanguage()), filter,
                    Integer.MAX_VALUE);
            tuplePredicateBuilder.in(msgRoot.get("id"), ids);
        }

        // Filter on bookmarked items
        if (param.isBookmarks()) {
            tuplePredicateBuilder.in(msgRoot.get("id"), messageService.getBookmarks());
        }

        // If we search by area or sort by area, join over...
        javax.persistence.criteria.Path<Area> areaRoot = null;
        if (param.getSortBy() == MessageSearchParams.SortBy.AREA || param.getAreaIds().size() > 0) {

            msgRoot.join("area", JoinType.LEFT);
            areaRoot = msgRoot.get("area");

            // Filter on areas
            if (param.getAreaIds().size() > 0) {

                // Note to self: A more efficient way would be to join on area and match
                // the lineage of the joined area with that of the message area...
                Predicate[] areaMatch = new Predicate[param.getAreaIds().size()];
                Iterator<Integer> i = param.getAreaIds().iterator();
                for (int x = 0; x < areaMatch.length; x++) {
                    String lineage = em.find(Area.class, i.next()).getLineage();
                    areaMatch[x] = builder.like(areaRoot.get("lineage"), lineage + "%");
                }
                tuplePredicateBuilder.add(builder.or(areaMatch));
            }
        }

        // Filter on categories
        if (param.getCategoryIds().size() > 0) {

            Join<Message, Category> categories = msgRoot.join("categories", JoinType.LEFT);
            Predicate[] categoryMatch = new Predicate[param.getCategoryIds().size()];
            Iterator<Integer> i = param.getCategoryIds().iterator();
            for (int x = 0; x < categoryMatch.length; x++) {
                String lineage = em.find(Category.class, i.next()).getLineage();
                categoryMatch[x] = builder.like(categories.get("lineage"), lineage + "%");
            }
            tuplePredicateBuilder.add(builder.or(categoryMatch));
        }

        // Filter on charts
        if (param.getChartIds().size() > 0) {

            Join<Message, Chart> charts = msgRoot.join("charts", JoinType.LEFT);
            Predicate[] chartMatch = new Predicate[param.getChartIds().size()];
            Iterator<Integer> i = param.getChartIds().iterator();
            for (int x = 0; x < chartMatch.length; x++) {
                chartMatch[x] = builder.equal(charts.get("id"), i.next());
            }
            tuplePredicateBuilder.add(builder.or(chartMatch));
        }

        // Determine the fields to fetch
        List<Selection<?>> fields = new ArrayList<>();
        fields.add(msgRoot.get("id"));
        if (MessageSearchParams.SortBy.DATE == param.getSortBy()) {
            fields.add(msgRoot.get("validFrom"));
        } else if (MessageSearchParams.SortBy.ID == param.getSortBy()) {
            fields.add(msgId.get("year"));
            fields.add(msgId.get("number"));
        } else if (MessageSearchParams.SortBy.AREA == param.getSortBy()) {
            fields.add(areaRoot.get("treeSortOrder"));
        }

        // Complete the query and fetch the message id's (and validFrom, year and number for sorting)
        tupleQuery.multiselect(fields.toArray(new Selection<?>[fields.size()])).distinct(true)
                .where(tuplePredicateBuilder.where());

        // Sort the query
        if (MessageSearchParams.SortBy.DATE == param.getSortBy()) {
            if (param.getSortOrder() == MessageSearchParams.SortOrder.ASC) {
                tupleQuery.orderBy(builder.asc(msgRoot.get("validFrom")), builder.asc(msgRoot.get("id")));
            } else {
                tupleQuery.orderBy(builder.desc(msgRoot.get("validFrom")), builder.desc(msgRoot.get("id")));
            }
        } else if (MessageSearchParams.SortBy.ID == param.getSortBy()) {
            if (param.getSortOrder() == MessageSearchParams.SortOrder.ASC) {
                tupleQuery.orderBy(builder.asc(msgId.get("year")), builder.asc(msgId.get("number")),
                        builder.asc(msgRoot.get("id")));
            } else {
                tupleQuery.orderBy(builder.desc(msgId.get("year")), builder.desc(msgId.get("number")),
                        builder.desc(msgRoot.get("id")));
            }
        } else if (MessageSearchParams.SortBy.AREA == param.getSortBy()) {
            if (param.getSortOrder() == MessageSearchParams.SortOrder.ASC) {
                tupleQuery.orderBy(builder.asc(areaRoot.get("treeSortOrder")), builder.asc(msgRoot.get("id")));
            } else {
                tupleQuery.orderBy(builder.desc(areaRoot.get("treeSortOrder")), builder.desc(msgRoot.get("id")));
            }
        }

        // Execute the query
        List<Tuple> totalResult = em.createQuery(tupleQuery).getResultList();

        // Register the total result
        result.setTotal(totalResult.size());

        List<Integer> msgIds = totalResult.stream().map(t -> (Integer) t.get(0)).collect(Collectors.toList());

        // Extract and return the paged sub-list
        return msgIds.subList(Math.min(param.getStartIndex(), msgIds.size()),
                Math.min(param.getStartIndex() + param.getMaxHits(), msgIds.size()));
    }

}